diff --git a/.gitignore b/.gitignore index f0c94b0..5fcbd95 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,8 @@ __pycache__/ /Results/raw_output/ /Cache/ /new_notebooks_EVCH/ +/examples/ +/docs/ # Root level data files chargingdata.csv diff --git a/Learning_Pricing.py b/Learning_Pricing.py deleted file mode 100644 index 2d3032c..0000000 --- a/Learning_Pricing.py +++ /dev/null @@ -1,188 +0,0 @@ -from resources.configuration.configuration import Configuration -from utilities.rl_agents.agents.actor_critic_agents.SAC import ( - SAC, -) -from resources.configuration.SAC_configuration import ( - config, - pricing_config, -) -from utilities.rl_environments.rl_pricing_env import PricingEnv -from utilities.sim_input_processing import sample_week -from resources.logging.log import lg -import pandas as pd -import numpy as np - -from run_simulation import run_single_simulation - -SIM_SEASON = Configuration.instance().SIM_SEASON -SUMMER_START = Configuration.instance().SUMMER_START -SUMMER_END = Configuration.instance().SUMMER_END -POST_FIX = Configuration.instance().POST_FIX - -Configuration.instance().dynamic_pricing = True - -evaluate_after_training = Configuration.instance().evaluation_after_training -number_of_chargers = 200 -PV_CAPA = Configuration.instance().PV -STORAGE_CAPA = 0 -max_cap = 50 -max_grid_usage = 2000 -TRANSFORMER_NUM = Configuration.instance().grid - -# config.number_chargers = number_of_chargers -# config.maximum_power = max_cap -# config.maximum_grid_usage = max_grid_usage -# config.environment = ChargingHubInvestmentEnv(config=config) -# config.learnt_network = evaluate_after_training -# agent = SAC(config) -agent = None - -# storage_config.number_chargers = 80 -# storage_config.maximum_power = 50 -# storage_config.maximum_grid_usage = 200 -# storage_config.environment = StorageEnv(config=storage_config) -# storage_config.learnt_network = False -# storage_agent = SAC(storage_config) - -def run_experiments(): - pricing_config.number_chargers = number_of_chargers - pricing_config.maximum_power = max_cap - pricing_config.maximum_grid_usage = max_grid_usage - pricing_config.number_power_options = len(Configuration.instance().energy_prices) + 0 - pricing_config.environment = PricingEnv(config=pricing_config, DQN=False) - pricing_config.learnt_network = evaluate_after_training - pricing_config.evaluation_after_training = evaluate_after_training - - pricing_agent = SAC(pricing_config) - training_results = pd.DataFrame([]) - episode = 1 - NUMBER_EPISODES = 301 - if Configuration.instance().pricing_mode == "perfect_info": - NUMBER_EPISODES = 1 - output = [] - while episode <= NUMBER_EPISODES: - START = sample_week( - sim_seasons=SIM_SEASON, - summer_start=SUMMER_START, - summer_end=SUMMER_END, - seed=42, - ) - print(START) - # week = random.sample(TRAIN_WEEKS, 1) - # week = START - results = None - off_monitoring = True - evaluation_episodes = 10 - time_to_learn = pricing_agent.hyperparameters["min_steps_before_learning"] - if evaluate_after_training: - off_monitoring = False - evaluation_episodes = 1 - results = [f"{POST_FIX}", f"state{9}", f"week{1}"] - time_to_learn = 0 - # chargers = {'fast_one': 5, 'fast_two': 40, 'fast_four': 5, 'slow_one': 5, 'slow_two': 10, 'slow_four': 0} - chargers = { - "fast_one": number_of_chargers, - "fast_two": 0, - "fast_four": 0, - "slow_one": 0, - "slow_two": 0, - "slow_four": 0, - } - lg.error(f"episode: {episode}", extra={"clazz": "", "oid": ""}) - if ( - episode % evaluation_episodes == 0 - and pricing_agent.global_step_number >= time_to_learn - ): - if agent: - agent.do_evaluation_iterations = True - pricing_agent.do_evaluation_iterations = True - ### activate when we have separate battery agent - # storage_agent.do_evaluation_iterations = True - df = run_single_simulation( - charging_agent=agent, - storage_agent=None, - pricing_agent=pricing_agent, - num_charger=chargers, - turn_off_monitoring=False, - turn_on_results=results, - turn_on_plotting=True, - transformer_num=TRANSFORMER_NUM, - storage_capa=STORAGE_CAPA, - pv_capa=PV_CAPA, - year=9, - start_day=START, - ) - pricing_agent.update_lr(new_objective=df["profit"], episode=episode) - print( - pricing_agent.alpha, - pricing_agent.learning_rate_actor, - max(pricing_agent.objective_function), - pricing_agent.hyperparameters["Critic"]["tau"], - pricing_agent.hyperparameters["batch_size"], - pricing_agent.action_size, - ) - else: - if agent: - agent.do_evaluation_iterations = False - pricing_agent.do_evaluation_iterations = False - ### activate when we have separate battery agent - # storage_agent.do_evaluation_iterations = False - df = run_single_simulation( - charging_agent=agent, - storage_agent=None, - pricing_agent=pricing_agent, - num_charger=chargers, - turn_off_monitoring=False, - turn_on_results=results, - turn_on_plotting=True, - transformer_num=TRANSFORMER_NUM, - storage_capa=STORAGE_CAPA, - pv_capa=PV_CAPA, - year=9, - start_day=START, - ) - - episode += 1 - pricing_agent.episode_number += 1 - if not Configuration.instance().evaluation_after_training: - training_results = pd.concat([training_results, df]) - training_results.to_csv(Configuration.instance().OUTPUT_DATA_PATH+ - f"training_results_{pricing_agent.config.name}.csv" - ) - output.append(df["profit"].values[0]) - # print(output) - return output[9:-1:10][-10:] - - -def find_best_parameters(): - try: - training_results = pd.read_csv(f'training_results_{config.path}.csv') - except: - training_results = pd.DataFrame(columns=['learning_rate', 'batch_size', 'tau', 'result']) - training_dict = {} - best_results = -10000000000 - best_parameters = {'learning_rate': 0, 'batch_size': 0, 'tau': 0} - for lr in [5e-5, 1e-4, 5e-4, 1e-3]: - for bs in [64, 256, 512]: - for tau in [0.05, 0.1]: - pricing_config.hyperparameters['batch_size'] = bs - pricing_config.hyperparameters['Actor']['learning_rate'] = lr - pricing_config.hyperparameters['Critic']['learning_rate'] = lr - pricing_config.hyperparameters['Actor']['tau'] = tau - pricing_config.hyperparameters['Critic']['tau'] = tau - pricing_config.hyperparameters['min_steps_before_learning'] = max(bs, 256) - mean_reward = run_experiments() - # print('Mean reward: ', mean_reward) - hyperparameters = {'learning_rate': lr, 'batch_size': bs, 'tau': tau} - if np.array(mean_reward).mean() > best_results: - best_results = np.array(mean_reward).mean() - best_parameters = hyperparameters - results_dict = {'result': mean_reward} - training_results = pd.concat( - [pd.DataFrame([[lr, bs, tau, mean_reward]], columns=training_results.columns), - training_results], ignore_index=True) - print(f'{hyperparameters}, {results_dict}, best: {best_results}, best_parameters: {best_parameters}') - training_results.to_csv(f'{Configuration.instance().OUTPUT_DATA_PATH}training_results_{pricing_config.name}_tuning.csv', index=False) - -# find_best_parameters() -run_experiments() \ No newline at end of file diff --git a/README.md b/README.md index 0c730d8..4437abf 100644 --- a/README.md +++ b/README.md @@ -7,20 +7,181 @@ EVCCs are expected to become a core component of the future charging portfolio o This simulation is intended to explore these interdependencies through extensive sensitivity testing and through testing new algorithms and models for sizing and operating EVCCs. The module structure is as follows: -## Module structure +## πŸ—οΈ Architecture + +The EVCC simulation framework is built with a modular, decoupled architecture that separates concerns and enables easy integration with different RL algorithms and libraries. + +### High-Level Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ EVCC Simulation β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Preferences β”‚ β”‚Infrastructureβ”‚ β”‚ Operations β”‚ β”‚ Results β”‚ β”‚ +β”‚ β”‚ Module β”‚ β”‚ Module β”‚ β”‚ Module β”‚ β”‚ Module β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ RL Agent Integration β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ RL Library β”‚ β”‚ Gym Adapter β”‚ β”‚ EVCH Gym β”‚ β”‚ +β”‚ β”‚ (Stable-Bas3, │───▢│ (Standard │───▢│ Environment β”‚ β”‚ +β”‚ β”‚ RLlib, etc.) β”‚ β”‚ Interface) β”‚ β”‚ (Wrapper) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Module Structure + The following modules are included: + - **`Preferences` Module:** Initializes vehicle objects with respective charging and parking preferences (i.e., requests) based on empirical data - **`Infrastructure` Module:** Initializes infrastructure objects (EV supply equipment (EVSE), connectors per each EVSE, grid connection capacity, on-site storage and on-site generation (PV)) -- **`Operations` Module:** Conatain algorithms for assigning physical space (vehicle routing) and electrical capacity (vehicle charging) to individual vehicle objects based on a pre-defined charging policy +- **`Operations` Module:** Contains algorithms for assigning physical space (vehicle routing) and electrical capacity (vehicle charging) to individual vehicle objects based on a pre-defined charging policy - **`Results` Module:** Monitors EVCC activity in pre-defined intervals and accounts costs. Includes plotting routines. +## πŸ†• NEW: Agent Decision System + +The framework now includes a **unified agent decision system** that ensures **ALL decisions** in the EV charging operations are made by agents (RL agents, rule-based agents, algorithm agents, etc.) rather than being hardcoded in business logic. + +### Agent Decision System Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Agent Decision System β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Pricing β”‚ β”‚ Charging β”‚ β”‚ Storage β”‚ β”‚ +β”‚ β”‚ Service β”‚ β”‚ Service β”‚ β”‚ Service β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β–Ό β–Ό β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ +β”‚ β”‚ Decision β”‚ β”‚ Decision β”‚ β”‚ Decision β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Agent Decision System β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ RL SAC β”‚ β”‚ Rule-Based β”‚ β”‚ Algorithm β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Supported Decision Types + +- **PRICING**: Energy prices, parking fees, dynamic pricing strategies +- **CHARGING**: Charging power allocation, schedules, priority assignment +- **STORAGE**: Energy storage operations, peak shaving, arbitrage +- **ROUTING**: Vehicle routing, parking allocation, queue management +- **VEHICLE_ASSIGNMENT**: Charging station assignment, connector allocation +- **PARKING_ALLOCATION**: Parking space allocation, duration optimization +- **GRID_MANAGEMENT**: Grid capacity management, load balancing +- **DEMAND_FORECASTING**: Energy demand prediction, load forecasting + +### Supported Agent Types + +- **RL_SAC**: Soft Actor-Critic reinforcement learning agent +- **RL_DQN**: Deep Q-Network reinforcement learning agent +- **RL_DDPG**: Deep Deterministic Policy Gradient agent +- **RULE_BASED**: Rule-based agents with predefined strategies +- **HEURISTIC**: Algorithm agents that wrap existing algorithms +- **OPTIMIZATION**: Mathematical optimization algorithms +- **ML_MODEL**: Machine learning models (neural networks, etc.) + +### Algorithm Agents + +The system includes **algorithm agents** that wrap all existing charging, routing, and storage algorithms: + +#### **Charging Algorithm Agents** +- `uncontrolled`, `first_come_first_served`, `earliest_deadline_first` +- `least_laxity_first`, `equal_sharing`, `online_myopic` +- `online_multi_period`, `integrated_storage`, `perfect_info` +- `perfect_info_with_storage` + +#### **Routing Algorithm Agents** +- `random`, `lowest_occupancy_first`, `fill_one_after_other` +- `lowest_utilization_first`, `matching_supply_demand`, `minimum_power_requirement` + +#### **Storage Algorithm Agents** +- `uncontrolled`, `temporal_arbitrage`, `peak_shaving` + +### Rule-Based Agents + +Pre-built rule-based agents for common strategies: + +#### **Pricing Agents** +- **Time-of-Use**: Peak/off-peak pricing based on time +- **Demand-Based**: Dynamic pricing based on current demand +- **Cost-Plus**: Fixed markup over base electricity cost + +#### **Charging Agents** +- **First-Come-First-Served**: Serve vehicles in arrival order +- **Priority-Based**: Prioritize vehicles by energy deficit and departure time +- **Load Balancing**: Distribute power evenly among vehicles + +#### **Storage Agents** +- **Peak Shaving**: Discharge during high load, charge during low load +- **Arbitrage**: Charge during low-price hours, discharge during high-price hours +- **Grid Support**: Support grid frequency stability + +### RL Agent Architecture + +The simulation supports complete decoupling of RL agents through a standardized gym-like interface: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ RL Agent Services β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Pricing β”‚ β”‚ Charging β”‚ β”‚ Storage β”‚ β”‚ +β”‚ β”‚ Service β”‚ β”‚ Service β”‚ β”‚ Service β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β–Ό β–Ό β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Pricing β”‚ β”‚ Charging β”‚ β”‚ Storage β”‚ β”‚ +β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Supported RL Libraries + +The framework supports integration with any gym-compatible RL library: + +- **Stable Baselines3**: SAC, PPO, DQN, A2C, TD3 +- **RLlib**: Distributed training, hyperparameter tuning +- **Custom Agents**: Any agent implementing the gym interface +- **Vectorized Environments**: Support for parallel training + +### Key Design Principles + +1. **Separation of Concerns**: RL logic is completely separated from simulation logic +2. **Standardized Interfaces**: All agents conform to gym-compatible interfaces +3. **Modularity**: Each service (pricing, charging, storage) is independent +4. **Extensibility**: Easy to add new RL algorithms or modify existing ones +5. **Scalability**: Support for distributed training and vectorized environments +6. **Agent-First Design**: All decisions go through agents, no hardcoded logic +7. **Backward Compatibility**: Existing algorithms are preserved and wrapped as agents +8. **Comprehensive Tracking**: Every decision is logged and can be monitored + ## πŸ“¦ Installation This project uses [`uv`](https://github.com/astral-sh/uv), a modern and ultra-fast Python package manager compatible with pip. ### 1. Install `uv` -If you don’t have `uv` installed, run: +If you don't have `uv` installed, run: ### Installation Steps @@ -37,4 +198,144 @@ source .venv/bin/activate # On macOS/Linux .venv\Scripts\activate # On Windows # Step 4: Install dependencies -uv pip install -r requirements.uv.txt \ No newline at end of file +uv pip install -r requirements.uv.txt +``` + +## πŸš€ Quick Start + +### Running the Simulation + +The main entry point for the simulation is `main.py`. You can run it with different configuration files: + +```bash +# Run with default configuration +python main.py resources/configuration/ini_files/app-remote.ini + +# Run with custom configuration +python main.py path/to/your/config.ini +``` + +### Configuration Files + +The simulation uses INI configuration files to set parameters for: +- Environment settings (seasons, duration, facility size) +- Infrastructure (chargers, grid capacity, storage, PV) +- Agent types and strategies +- Logging and monitoring options + +### Example Configuration + +```ini +[AGENT_DECISION_SYSTEM] +enabled = True +pricing_agent_type = RULE_BASED +charging_agent_type = HEURISTIC +enable_hyperparameter_tuning = False + +[SETTINGS] +log_level = INFO +facility_size = 200 +``` + +## πŸ“š Documentation + +- **[Agent Decision System Guide](docs/agent_decision_system.md)**: Comprehensive guide to the new agent system +- **[Decision Request System](docs/decision_request_system.md)**: Details about the underlying decision tracking system +- **[Algorithm Agents](utilities/rl_agents/algorithm_agents.py)**: Documentation of algorithm agents +- **[Rule-Based Agents](utilities/rl_agents/rule_based_agents.py)**: Documentation of rule-based agents + +## πŸ”§ Examples + +- **[Agent Decision System Example](examples/agent_decision_system_example.py)**: Basic usage examples +- **[Algorithm Agents Example](examples/algorithm_agents_example.py)**: Using algorithm agents +- **[Decision Request Example](examples/decision_request_example.py)**: Decision tracking examples + +## 🎯 Key Benefits + +### 1. **Consistency** +- All decisions follow the same pattern +- Standardized interfaces and data structures +- Consistent error handling and logging + +### 2. **Modularity** +- Easy to add new agent types +- Simple to switch between different strategies +- Clear separation of concerns + +### 3. **Testability** +- Agents can be tested independently +- Mock agents for unit testing +- Easy to compare different strategies + +### 4. **Observability** +- Every decision is tracked and logged +- Performance metrics for all agents +- Decision history for analysis + +### 5. **Flexibility** +- Support for multiple agent types +- Easy to implement new strategies +- Can mix different agent types + +### 6. **Maintainability** +- Clear agent interfaces +- Well-documented decision types +- Easy to understand and modify + +### 7. **Backward Compatibility** +- Existing algorithms are preserved as agents +- No need to rewrite existing code +- Gradual migration path + +## πŸ”„ Migration Guide + +To migrate existing code to use the agent decision system: + +1. **Identify Decision Points**: Find all places where decisions are made +2. **Create Agents**: Implement agents for each decision type +3. **Register Agents**: Register agents with the system +4. **Replace Decision Logic**: Replace hardcoded logic with agent calls +5. **Test and Monitor**: Verify behavior and monitor performance + +### Migration from Existing Algorithms + +```python +# Before: Direct algorithm call +first_come_first_served( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=500, + free_grid_capacity=300, + planning_period_length=15 +) + +# After: Using algorithm agent +charging_agent = AlgorithmChargingAgent(algorithm="first_come_first_served") +context = { + "env": env, + "charging_stations": charging_stations, + "charging_capacity": 500, + "free_grid_capacity": 300, + "planning_period_length": 15 +} +decision = charging_agent.select_action(vehicles, context) +``` + +## πŸš€ Future Enhancements + +- **Multi-Agent Coordination**: Agents that can coordinate with each other +- **Adaptive Agents**: Agents that can switch strategies based on performance +- **Distributed Agents**: Support for distributed agent deployment +- **Advanced Analytics**: More sophisticated performance analysis +- **Agent Marketplace**: Repository of pre-built agents for common use cases +- **Algorithm Performance Comparison**: Tools to compare different algorithms +- **Hybrid Agents**: Agents that combine multiple strategies + +## 🀝 Contributing + +We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details. + +## πŸ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning b/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning index 63a06da..be3199f 100644 Binary files a/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning and b/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning differ diff --git a/main.py b/main.py index 1df01fe..7fd0896 100644 --- a/main.py +++ b/main.py @@ -1,51 +1,142 @@ -# Executes full simulation routine +# Executes full simulation routine with integrated agent decision system +# Supports both learnable agents (RL agents) and non-learnable agents (rule-based, algorithm agents) + from resources.configuration.configuration import Configuration -from utilities.sim_input_processing import sample_week from run_simulation import run_single_simulation +from simulation.operations.agent_decision_system import agent_decision_system +from utilities.rl_agents.interfaces import DecisionType +from utilities.agent_factory import create_agent, is_agent_learnable, get_agent_configuration +from utilities.training_manager import run_learnable_agent_training +from resources.logging.log import lg -# Change working directory to path of run.py - def run_experiments(): - number_of_chargers = 200 - PV_CAPA = Configuration.instance().PV - STORAGE_CAPA = 0 - max_cap = 50 - max_grid_usage = 2000 - TRANSFORMER_NUM = Configuration.instance().grid - START = sample_week( - sim_seasons=Configuration.instance().SIM_SEASON, - summer_start=Configuration.instance().SUMMER_START, - summer_end=Configuration.instance().SUMMER_END, - seed=42, - ) - print(START) - # week = random.sample(TRAIN_WEEKS, 1) - # week = START - results = None - results = [f"{Configuration.instance().POST_FIX}", f"state{9}", f"week{1}"] - chargers = { - "fast_one": number_of_chargers, - "fast_two": 0, - "fast_four": 0, - "slow_one": 0, - "slow_two": 0, - "slow_four": 0, - } - - run_single_simulation( - charging_agent=None, - storage_agent=None, - pricing_agent=None, - num_charger=chargers, - turn_off_monitoring=False, - turn_on_results=results, - turn_on_plotting=True, - transformer_num=TRANSFORMER_NUM, - storage_capa=STORAGE_CAPA, - pv_capa=PV_CAPA, - year=9, - start_day=START) + """ + Main experiment runner that orchestrates the simulation. + + This function: + 1. Creates and configures agents based on configuration + 2. Registers agents with the decision system + 3. Runs training for learnable agents or single simulation for non-learnable agents + """ + config = Configuration.instance() + + print("Starting EVCC simulation with agent decision system...") + + # Get agent configuration + agent_config = get_agent_configuration(config) + + # Create and register agents + agents = {} + learnable_agents = {} + # Create pricing agent if configured + if agent_config["pricing"]["agent_type"]: + pricing_agent = create_agent( + "pricing", + agent_config["pricing"]["agent_type"], + strategy=agent_config["pricing"]["strategy"] + ) + agents["pricing"] = pricing_agent + agent_decision_system.register_agent(DecisionType.PRICING, pricing_agent) + print(f"Created pricing agent: {pricing_agent.__class__.__name__} with strategy: {agent_config['pricing']['strategy']}") + + # Check if this is a learnable agent + if is_agent_learnable(agent_config["pricing"]["agent_type"]): + learnable_agents["pricing"] = pricing_agent + print(f"Learnable agent detected: pricing - {pricing_agent.__class__.__name__}") + + # Create charging agent if configured + if agent_config["charging"]["agent_type"]: + charging_agent = create_agent( + "charging", + agent_config["charging"]["agent_type"], + algorithm=agent_config["charging"]["algorithm"], + strategy=agent_config["charging"]["strategy"] + ) + agents["charging"] = charging_agent + agent_decision_system.register_agent(DecisionType.CHARGING, charging_agent) + print(f"Created charging agent: {charging_agent.__class__.__name__} with strategy: {agent_config['charging']['strategy']}") + + if is_agent_learnable(agent_config["charging"]["agent_type"]): + learnable_agents["charging"] = charging_agent + print(f"Learnable agent detected: charging - {charging_agent.__class__.__name__}") + + # Create storage agent if configured + if agent_config["storage"]["agent_type"]: + storage_agent = create_agent( + "storage", + agent_config["storage"]["agent_type"], + algorithm=agent_config["storage"]["algorithm"], + strategy=agent_config["storage"]["strategy"] + ) + agents["storage"] = storage_agent + agent_decision_system.register_agent(DecisionType.STORAGE, storage_agent) + print(f"Created storage agent: {storage_agent.__class__.__name__} with strategy: {agent_config['storage']['strategy']}") + + if is_agent_learnable(agent_config["storage"]["agent_type"]): + learnable_agents["storage"] = storage_agent + print(f"Learnable agent detected: storage - {storage_agent.__class__.__name__}") + + # Create routing agent if configured + if agent_config["routing"]["agent_type"]: + routing_agent = create_agent( + "routing", + agent_config["routing"]["agent_type"], + algorithm=agent_config["routing"]["algorithm"], + strategy=agent_config["routing"]["strategy"] + ) + agents["routing"] = routing_agent + agent_decision_system.register_agent(DecisionType.ROUTING, routing_agent) + print(f"Created routing agent: {routing_agent.__class__.__name__} with strategy: {agent_config['routing']['strategy']}") + + if is_agent_learnable(agent_config["routing"]["agent_type"]): + learnable_agents["routing"] = routing_agent + print(f"Learnable agent detected: routing - {routing_agent.__class__.__name__}") + + # Enable dynamic pricing if using learnable pricing agents + if (agent_config["pricing"]["agent_type"] and + is_agent_learnable(agent_config["pricing"]["agent_type"])): + config.dynamic_pricing = True + print(f"Enabled dynamic pricing for learnable agent: {agent_config['pricing']['agent_type']}") + + # Run experiments based on agent types + if learnable_agents: + print(f"\nFound {len(learnable_agents)} learnable agents. Running training...") + + # Run training for each learnable agent + for decision_type, agent in learnable_agents.items(): + agent_type = agent_config[decision_type]["agent_type"] + run_learnable_agent_training(agent_type, agent, decision_type, config) + + else: + print("\nNo learnable agents detected. Running single simulation...") + + # Run single simulation with non-learnable agents + # Use a default start day if not specified + default_start_day = "2019-05-20" # Default Monday in May 2019 + + # Prepare results parameters for single simulation + results_params = [f"{getattr(config, 'POST_FIX', 'sim')}", f"state{9}", f"week{1}"] + + run_single_simulation( + charging_agent=agents.get("charging"), + storage_agent=agents.get("storage"), + pricing_agent=agents.get("pricing"), + num_charger={"fast_one": config.facility_size, "fast_two": 0, "fast_four": 0, + "slow_one": 0, "slow_two": 0, "slow_four": 0}, + turn_off_monitoring=False, + turn_on_results=results_params, # Pass list instead of boolean + turn_on_plotting=True, + transformer_num=config.TRANSFORMER_NUM, + storage_capa=config.STORAGE_SIZE, + pv_capa=config.PV_INSTALLED_CAPA, + year=9, + start_day=default_start_day + ) + + print("Simulation completed successfully!") + -run_experiments() \ No newline at end of file +if __name__ == "__main__": + run_experiments() \ No newline at end of file diff --git a/resources/configuration/configuration.py b/resources/configuration/configuration.py index b910fc1..135d1d5 100644 --- a/resources/configuration/configuration.py +++ b/resources/configuration/configuration.py @@ -42,7 +42,7 @@ def __init__(self): # if self.benchmarking: # self.peak_cost = 0 self.remove_low_request_EVs = False - self.evaluation_after_training = True + self.evaluation_after_training = False self.demand_threshold = 0 self.duration_threshold = 1000000 self.request_adjusting_mode = "Continuous" #'Discrete, Continuous' @@ -115,6 +115,53 @@ def __init__(self): self.lower_base_power = 0 self.higher_base_power = 0 + # Agent Decision System Configuration (defaults) + # ============================================= + self.use_agent_decision_system = True # Enable the new agent decision system + self.default_agent_types = { + "pricing": "RULE_BASED", # Default pricing agent type + "charging": "HEURISTIC", # Default charging agent type (algorithm wrapper) + "storage": "HEURISTIC", # Default storage agent type (algorithm wrapper) + "routing": "HEURISTIC", # Default routing agent type (algorithm wrapper) + "vehicle_assignment": "HEURISTIC", # Default vehicle assignment agent type + "parking_allocation": "RULE_BASED", # Default parking allocation agent type + "grid_management": "RULE_BASED", # Default grid management agent type + "demand_forecasting": "RULE_BASED" # Default demand forecasting agent type + } + + self.agent_configuration_file = None # Path to agent-specific configuration + self.enable_decision_tracking = True # Enable comprehensive decision tracking + self.enable_performance_monitoring = True # Enable agent performance monitoring + + # Agent Algorithm Configuration (defaults) + # ====================================== + # Default algorithms for algorithm agents (when agent_type = "HEURISTIC") + self.default_algorithms = { + "charging": "first_come_first_served", # Default charging algorithm + "routing": "lowest_occupancy_first", # Default routing algorithm + "storage": "peak_shaving" # Default storage algorithm + } + + # Agent Performance Thresholds (defaults) + # ===================================== + self.agent_confidence_threshold = 0.7 # Minimum confidence for agent decisions + self.agent_timeout_seconds = 30.0 # Timeout for agent decisions + self.agent_fallback_enabled = True # Enable fallback to direct calls if agent fails + self.enable_hyperparameter_tuning = False + self.save_training_results = False # Don't save results during training by default + + # Agent Strategy Configuration (defaults) + self.default_strategies = { + "pricing": "time_of_use", + "charging": "first_come_first_served", + "storage": "peak_shaving", + "routing": "lowest_occupancy_first", + "vehicle_assignment": "lowest_occupancy_first", # Same as routing + "parking_allocation": "lowest_occupancy_first", # Same as routing + "grid_management": "load_balancing", + "demand_forecasting": "historical_average" + } + # from main file self.set_parameters_from_ini_file() @@ -188,6 +235,51 @@ def adjust_peak_penalty(self, peak_penalty): if peak_penalty == 'h': self.peak_cost = self.peak_cost * 3 + def get_agent_configuration(self, decision_type: str) -> dict: + """ + Get agent configuration for a specific decision type. + + Args: + decision_type: The type of decision (pricing, charging, storage, routing, etc.) + + Returns: + Dictionary containing agent configuration + """ + if not self.use_agent_decision_system: + return {} + + agent_type = self.default_agent_types.get(decision_type.lower(), "RULE_BASED") + algorithm = self.default_algorithms.get(decision_type.lower(), None) + + config = { + "agent_type": agent_type, + "enabled": True, + "confidence_threshold": self.agent_confidence_threshold, + "timeout_seconds": self.agent_timeout_seconds, + "fallback_enabled": self.agent_fallback_enabled + } + + if algorithm: + config["algorithm"] = algorithm + + return config + + def get_all_agent_configurations(self) -> dict: + """ + Get configuration for all agent types. + + Returns: + Dictionary containing configurations for all decision types + """ + if not self.use_agent_decision_system: + return {} + + configs = {} + for decision_type in self.default_agent_types.keys(): + configs[decision_type] = self.get_agent_configuration(decision_type) + + return configs + def set_parameters_from_ini_file(self) -> None: @@ -201,6 +293,9 @@ def set_parameters_from_ini_file(self) -> None: self.CACHE_PATH_WS = parser_main.get("SETTINGS", "caching_path") self.OUTPUT_DATA_PATH = parser_main.get("SETTINGS", "raw_output_save_path") self.OUTPUT_VIZ_PATH = parser_main.get("SETTINGS", "visuals_save_path") + + # Logging configuration + self.log_level = parser_main.get("SETTINGS", "log_level", fallback="ERROR") # self.TRAIN_WEEKS, self.TEST_WEEKS = sample_training_and_test_weeks(seed=None) self.SIM_SEASON = parser_main.get("ENVIRONMENT", "sim_season").split(",") @@ -264,6 +359,66 @@ def set_parameters_from_ini_file(self) -> None: self.LOOKAHEAD = parser_main.getint("OPERATOR", "num_lookahead_planning_periods") self.LOOKBACK = 24 * 60 + # Agent Decision System Configuration + # =================================== + if parser_main.has_section("AGENT_DECISION_SYSTEM"): + self.use_agent_decision_system = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enabled", fallback=True) + self.enable_decision_tracking = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_decision_tracking", fallback=True) + self.enable_performance_monitoring = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_performance_monitoring", fallback=True) + self.agent_confidence_threshold = parser_main.getfloat("AGENT_DECISION_SYSTEM", "confidence_threshold", fallback=0.7) + self.agent_timeout_seconds = parser_main.getfloat("AGENT_DECISION_SYSTEM", "timeout_seconds", fallback=30.0) + self.agent_fallback_enabled = parser_main.getboolean("AGENT_DECISION_SYSTEM", "fallback_enabled", fallback=True) + self.enable_hyperparameter_tuning = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_hyperparameter_tuning", fallback=False) + self.save_training_results = parser_main.getboolean("AGENT_DECISION_SYSTEM", "save_training_results", fallback=False) + + # Read agent types for each decision type + if parser_main.has_option("AGENT_DECISION_SYSTEM", "pricing_agent_type"): + self.default_agent_types["pricing"] = parser_main.get("AGENT_DECISION_SYSTEM", "pricing_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "charging_agent_type"): + self.default_agent_types["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "charging_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "storage_agent_type"): + self.default_agent_types["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "storage_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "routing_agent_type"): + self.default_agent_types["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "routing_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "vehicle_assignment_agent_type"): + self.default_agent_types["vehicle_assignment"] = parser_main.get("AGENT_DECISION_SYSTEM", "vehicle_assignment_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "parking_allocation_agent_type"): + self.default_agent_types["parking_allocation"] = parser_main.get("AGENT_DECISION_SYSTEM", "parking_allocation_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "grid_management_agent_type"): + self.default_agent_types["grid_management"] = parser_main.get("AGENT_DECISION_SYSTEM", "grid_management_agent_type") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "demand_forecasting_agent_type"): + self.default_agent_types["demand_forecasting"] = parser_main.get("AGENT_DECISION_SYSTEM", "demand_forecasting_agent_type") + + # Read strategy parameters for each decision type + if parser_main.has_option("AGENT_DECISION_SYSTEM", "pricing_strategy"): + self.default_strategies["pricing"] = parser_main.get("AGENT_DECISION_SYSTEM", "pricing_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "charging_strategy"): + self.default_strategies["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "charging_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "storage_strategy"): + self.default_strategies["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "storage_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "routing_strategy"): + self.default_strategies["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "routing_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "vehicle_assignment_strategy"): + self.default_strategies["vehicle_assignment"] = parser_main.get("AGENT_DECISION_SYSTEM", "vehicle_assignment_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "parking_allocation_strategy"): + self.default_strategies["parking_allocation"] = parser_main.get("AGENT_DECISION_SYSTEM", "parking_allocation_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "grid_management_strategy"): + self.default_strategies["grid_management"] = parser_main.get("AGENT_DECISION_SYSTEM", "grid_management_strategy") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "demand_forecasting_strategy"): + self.default_strategies["demand_forecasting"] = parser_main.get("AGENT_DECISION_SYSTEM", "demand_forecasting_strategy") + + # Read default algorithms for algorithm agents + if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_charging_algorithm"): + self.default_algorithms["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_charging_algorithm") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_routing_algorithm"): + self.default_algorithms["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_routing_algorithm") + if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_storage_algorithm"): + self.default_algorithms["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_storage_algorithm") + + # Read agent configuration file path if specified + if parser_main.has_option("AGENT_DECISION_SYSTEM", "agent_configuration_file"): + self.agent_configuration_file = parser_main.get("AGENT_DECISION_SYSTEM", "agent_configuration_file") + self.MAINTENANCE_COST = parser_main.getfloat("CAPEX", "maintenance_cost") self.ELECTRICITY_TARIFF = parser_main.get("OPEX", "hourly_energy_costs").split(",") self.ELECTRICITY_TARIFF = [int(x) / 100 for x in self.ELECTRICITY_TARIFF] diff --git a/resources/configuration/ini_files/Conda_Environment_Config.txt b/resources/configuration/ini_files/Conda_Environment_Config.txt deleted file mode 100644 index 7bab06a..0000000 --- a/resources/configuration/ini_files/Conda_Environment_Config.txt +++ /dev/null @@ -1,53 +0,0 @@ -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: linux-64 -@EXPLICIT -https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2020.12.5-ha878542_0.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.33.1-h53a641e_7.conda -https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_19.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-9.1.0-hdf63c60_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-9.1.0-hdf63c60_0.conda -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_19.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/icu-67.1-he1b5a44_0.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.12-pthreads_hb3c22a3_1.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.2-he6710b0_1.conda -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1k-h27cfd23_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.5-h7b6447c_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.conda -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-8_openblas.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h21135ba_2.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/readline-8.1-h27cfd23_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.10-hbc83047_0.conda -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.10.4-h7ca028e_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-8_openblas.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-8_openblas.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.35.4-hdfb4753_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/python-3.7.10-hdb3f193_0.conda -https://conda.anaconda.org/conda-forge/noarch/et_xmlfile-1.0.1-py_1001.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/jdcal-1.4.1-py_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.5-py37h95a1406_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-2.4.7-pyh9f0ad1d_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.7-1_cp37m.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/pytz-2021.1-pyhd8ed1ab_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/simpy-4.0.1-pyhd8ed1ab_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 -https://repo.anaconda.com/pkgs/main/noarch/wheel-0.36.2-pyhd3eb1b0_0.conda -https://conda.anaconda.org/conda-forge/noarch/xlrd-2.0.1-pyhd8ed1ab_3.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/certifi-2020.12.5-py37h89c1867_1.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.10.0-py_2.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.3.1-py37hc928c03_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/openpyxl-3.0.7-pyhd8ed1ab_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.1-py_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.3-py37h8911b10_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.1-py37h4abf009_0.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/pandas-0.25.3-py37hb3f55d8_0.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.1-py_0.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-52.0.0-py37h06a4308_0.conda -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.2.2-py37h1d35a4c_1.tar.bz2 -https://repo.anaconda.com/pkgs/main/linux-64/pip-21.0.1-py37h06a4308_0.conda -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.11.1-py37h8f50634_2.tar.bz2 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.2.2-1.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.1-pyhd8ed1ab_1.tar.bz2 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.1-hd8ed1ab_1.tar.bz2 diff --git a/resources/configuration/ini_files/app-local.ini b/resources/configuration/ini_files/app-local.ini deleted file mode 100644 index a3e74d8..0000000 --- a/resources/configuration/ini_files/app-local.ini +++ /dev/null @@ -1,67 +0,0 @@ -# Basic Settings -[SETTINGS] -raw_input_path = /Volumes/karsten.schroer@uni-koeln.de/Uni/Research/04_CASE_Clustered_EV_Charging/ -raw_output_save_path = ./Utilities/raw_output/ -visuals_save_path= ./Utilities/visuals_output/ -log_level = ERROR - -# File to Environment Bounding Box -[ENVIRONMENT] -#name of scenario (appears as postfix in output file names) -post_fix= _testing -#date -sim_start_day = 2019-06-03 -#days -sim_duration = 1 -day_types = Workday,Saturday,Sunday - -# Charging/Parking Requests -[REQUESTS] -#which facility to sample parking requests from -facility = Facility_3 -#share of EVs in total population -ev_share = 0.25 -#region for undelying EV population -region = CA -# artificially limit daily requests to a specific number -max_daily_requests = False - -# Facility Attributes -[INFRASTRUCTURE] -parking_capa = 1000 -# number of chargers -num_charger = 1000 -#connectors per charger -num_connector = 1 -#all in KW -charger_power = 22 -grid_capa = 700 -min_facility_baseload = 75 -max_facility_baseload = 300 -installed_capa_PV = 200 - -# Operator Configurations -[OPERATOR] -# routing decisions -routing_algo = None -#charging decisions -charging_algo = equal_sharing -# how is a planning period defined (in sim time) -planning_period_length = 15 -# how much foresight in operations? -num_lookahead_planning_periods=1 - -# Investment Costs -[CAPEX] -charger_cost = 4000 -connector_cost = 250 -#USD/kW -grid_expansion_cost = 240 - -# Operational Costs -[OPEX] -#USD/kWh -hourly_energy_costs = 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 23, 23, 23, 23, 23, 23, 8, 8 -#USD/kW -monthly_peak_cost = 15.84 - diff --git a/resources/configuration/ini_files/app-remote_second_WS.ini b/resources/configuration/ini_files/app-remote.ini similarity index 58% rename from resources/configuration/ini_files/app-remote_second_WS.ini rename to resources/configuration/ini_files/app-remote.ini index b0cfd20..ca62c10 100644 --- a/resources/configuration/ini_files/app-remote_second_WS.ini +++ b/resources/configuration/ini_files/app-remote.ini @@ -48,8 +48,8 @@ num_connector = 1 #all in kW charger_power_fast = 50 charger_power_slow = 22 -num_transformer = 0 -grid_capa = 0 +num_transformer = 3 +grid_capa = 500 #1.2 maximum load min_facility_baseload = 75 ;75 @@ -62,22 +62,29 @@ installed_capa_PV = 600 installed_storage = 500 # Operator Configurations +# ======================= +# NOTE: These legacy algorithm configurations are now used as defaults for algorithm agents +# when the agent decision system is enabled. The actual algorithms are wrapped as agents +# and can be configured in the [AGENT_DECISION_SYSTEM] section above. [OPERATOR] #routing decisions # choose from: perfect_info, random, lowest_occupancy_first, lowest_utilization_first, fill_one_after_other, # matching_supply_demand, minimum_power_requirement + # These algorithms are now wrapped as AlgorithmRoutingAgent when agent_type = HEURISTIC routing_algo = minimum_power_requirement #charging decisions # choose from: perfect_info, uncontrolled, first_come_first_served, earliest_deadline_first, least_laxity_first, equal_sharing, # online_myopic, online_multi_period, integrated_storage, dynamic, dynamic_multi_agent, average_power + # These algorithms are now wrapped as AlgorithmChargingAgent when agent_type = HEURISTIC charging_algo = average_power #storage decision # choose from: temporal_arbitrage, peak_shaving +# These algorithms are now wrapped as AlgorithmStorageAgent when agent_type = HEURISTIC storage_algo = peak_shaving # how is a planning period defined (in sim time) scheduling_mode = discrete_time # at which intervals (in unit sim time) to do the routing/charging re-planning -planning_interval = 59 +planning_interval = 15 # how long a period is in the optimization (in unit sim time) optimization_period_length = 15 # how much foresight in operations (in optimization periods)?, only relevant for dynamic model! @@ -88,6 +95,67 @@ service_level=1 minimum_served_demand = 1 penalty_for_missed_kWh = 0.5 + +# Agent Decision System Configuration +# =================================== +# This section configures the new agent-based decision system where all decisions +# are made by agents (RL agents, rule-based agents, algorithm agents) rather than +# being hardcoded in business logic. +[AGENT_DECISION_SYSTEM] +# Enable/disable the agent decision system +enabled = True + +# Enable comprehensive decision tracking and monitoring +enable_decision_tracking = True +enable_performance_monitoring = True + +# Agent performance thresholds +confidence_threshold = 0.7 +timeout_seconds = 30.0 +fallback_enabled = True +# Enable hyperparameter tuning for learnable agents (grid search over learning_rate, batch_size, tau) +enable_hyperparameter_tuning = False +# Save training results during training episodes (can slow down training) +save_training_results = False + +# Agent types for each decision type +# Available options: RL_SAC, RL_DQN, RL_DDPG, RULE_BASED, HEURISTIC, OPTIMIZATION, ML_MODEL +pricing_agent_type = RULE_BASED +charging_agent_type = HEURISTIC +storage_agent_type = HEURISTIC +routing_agent_type = HEURISTIC +vehicle_assignment_agent_type = HEURISTIC +parking_allocation_agent_type = HEURISTIC +grid_management_agent_type = HEURISTIC +demand_forecasting_agent_type = HEURISTIC + +# Strategy parameters for each decision type +# Available strategies depend on the agent type and decision type +pricing_strategy = time_of_use +charging_strategy = first_come_first_served +storage_strategy = peak_shaving +routing_strategy = lowest_occupancy_first +vehicle_assignment_strategy = lowest_occupancy_first +parking_allocation_strategy = lowest_occupancy_first +grid_management_strategy = load_balancing +demand_forecasting_strategy = historical_average + +# Default algorithms for algorithm agents (when agent_type = HEURISTIC) +# Charging algorithms: uncontrolled, first_come_first_served, earliest_deadline_first, +# least_laxity_first, equal_sharing, online_myopic, online_multi_period, +# integrated_storage, perfect_info, perfect_info_with_storage +default_charging_algorithm = first_come_first_served + +# Routing algorithms: random, lowest_occupancy_first, fill_one_after_other, +# lowest_utilization_first, matching_supply_demand, minimum_power_requirement +default_routing_algorithm = lowest_occupancy_first + +# Storage algorithms: uncontrolled, temporal_arbitrage, peak_shaving +default_storage_algorithm = peak_shaving + +# Path to agent-specific configuration file (optional) +# agent_configuration_file = config/agents.json + # Investment Costs [CAPEX] maintenance_cost = 0.05 diff --git a/resources/logging/log.py b/resources/logging/log.py index 67fa89d..f42514c 100644 --- a/resources/logging/log.py +++ b/resources/logging/log.py @@ -3,7 +3,26 @@ from resources.logging.simulation_context_filter import SimulationContextFilter lg = logging.getLogger() -lg.setLevel(logging.ERROR) + +# Try to get log level from configuration, fallback to ERROR if not available +try: + from resources.configuration.configuration import Configuration + config = Configuration.instance() + log_level_str = getattr(config, 'log_level', 'ERROR') + + # Convert string to logging level + log_level_map = { + 'DEBUG': logging.DEBUG, + 'INFO': logging.INFO, + 'WARNING': logging.WARNING, + 'ERROR': logging.ERROR, + 'CRITICAL': logging.CRITICAL + } + log_level = log_level_map.get(log_level_str.upper(), logging.ERROR) +except: + log_level = logging.ERROR + +lg.setLevel(log_level) # Avoid adding duplicate handlers if this module is imported multiple times if not lg.handlers: @@ -11,11 +30,11 @@ formatter = logging.Formatter('%(asctime)s [%(levelname)s] [Time %(env_time)10s] [%(clazz)30s %(oid)3s]: %(message)s') file_handler = logging.FileHandler("report.log") file_handler.setFormatter(formatter) - file_handler.setLevel(logging.ERROR) + file_handler.setLevel(log_level) stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) - stream_handler.setLevel(logging.ERROR) + stream_handler.setLevel(log_level) lg.addHandler(file_handler) lg.addHandler(stream_handler) diff --git a/run_simulation.py b/run_simulation.py index 6706b3c..f88ec8f 100644 --- a/run_simulation.py +++ b/run_simulation.py @@ -248,29 +248,33 @@ def run_single_simulation( plot_time = round((plot_end_time - save_end_time) / 60, 2) print("Results Plotted (in {} minutes)".format(plot_time)) if model.charging_agent: - model.charging_agent.save_models() + if hasattr(model.charging_agent, 'save_models'): + model.charging_agent.save_models() if model.pricing_agent: - model.pricing_agent.save_models() + if hasattr(model.pricing_agent, 'save_models'): + model.pricing_agent.save_models() # model.storage_agent.save_models() - if model.charging_agent: + if model.charging_agent and hasattr(model.charging_agent, 'environment'): lg.error( f"profit = {model.charging_agent.environment.total_reward['missed']}," f" energy = {model.charging_agent.environment.total_reward['energy']} ,feasibility " f"= {model.charging_agent.environment.total_reward['feasibility']}, feasibility_storage " f"= {model.charging_agent.environment.total_reward['feasibility_storage']}, pricing " - f"= {model.pricing_agent.environment.total_reward['missed']}" + f"= {model.pricing_agent.environment.total_reward['missed'] if hasattr(model.pricing_agent, 'environment') else 'N/A'}" ) - if model.pricing_agent: - lg.error(f"profit ={model.pricing_agent.environment.total_reward['missed']}") - if model.charging_agent: + if model.pricing_agent and hasattr(model.pricing_agent, 'environment'): + lg.error(f"profit ={model.pricing_agent.environment.total_reward['profit']}") + if model.charging_agent and hasattr(model.charging_agent, 'environment'): model.charging_agent.environment.total_reward["missed"] = 0 model.charging_agent.environment.total_reward["feasibility"] = 0 model.charging_agent.environment.total_reward["feasibility_storage"] = 0 model.charging_agent.environment.total_reward["energy"] = 0 - if model.pricing_agent: - model.pricing_agent.environment.total_reward["missed"] = 0 - model.pricing_agent._critic_loss = 0 - model.pricing_agent._policy_loss = 0 + if model.pricing_agent and hasattr(model.pricing_agent, 'environment'): + model.pricing_agent.environment.total_reward["profit"] = 0 + if hasattr(model.pricing_agent, '_critic_loss'): + model.pricing_agent._critic_loss = 0 + if hasattr(model.pricing_agent, '_policy_loss'): + model.pricing_agent._policy_loss = 0 # model.storage_agent.environment.total_reward['test'] = 0 output = pd.DataFrame( [ diff --git a/simulation/model.py b/simulation/model.py index 4bb9f11..a79419a 100644 --- a/simulation/model.py +++ b/simulation/model.py @@ -583,7 +583,7 @@ def _init_operations( self.costs = dict(investment=0, operations=0) self.objective_function = 0 self.total_energy_charged = 0 - self.reward = dict(costs=0, missed=0, feasibility=0, feasibility_storage=0) + self.reward = dict(costs=0, profit=0, feasibility=0, feasibility_storage=0) # Create operator self.operator = Operator( @@ -628,27 +628,54 @@ def _init_agents( # Setup charging agent if self.charging_agent: - self.charging_agent.environment.state = ( - self.charging_agent.environment.get_state(self, self.env) - ) - self.charging_agent.environment.env = self.env - self.charging_agent.reset_game() + # Check if agent has environment (RL agents) or not (rule-based/algorithm agents) + if hasattr(self.charging_agent, 'environment'): + # Set charging_hub and env in the environment for RL agents + self.charging_agent.environment.charging_hub = self + self.charging_agent.environment.env = self.env + self.charging_agent.environment.state = ( + self.charging_agent.environment.get_state(self, self.env) + ) + self.charging_agent.reset_game() + else: + # For rule-based/algorithm agents, just store the charging hub reference + if hasattr(self.charging_agent, 'set_charging_hub'): + self.charging_agent.set_charging_hub(self) + print(f"Initialized charging agent: {self.charging_agent.__class__.__name__}") # Setup pricing agent if self.pricing_agent: - self.pricing_agent.environment.state = self.pricing_agent.environment.get_state( - self, self.env - ) - self.pricing_agent.environment.env = self.env - self.pricing_agent.reset_game() + # Check if agent has environment (RL agents) or not (rule-based/algorithm agents) + if hasattr(self.pricing_agent, 'environment'): + # Set charging_hub and env in the environment for RL agents + self.pricing_agent.environment.charging_hub = self + self.pricing_agent.environment.env = self.env + self.pricing_agent.environment.state = self.pricing_agent.environment.get_state( + self, self.env + ) + self.pricing_agent.reset_game() + else: + # For rule-based/algorithm agents, just store the charging hub reference + if hasattr(self.pricing_agent, 'set_charging_hub'): + self.pricing_agent.set_charging_hub(self) + print(f"Initialized pricing agent: {self.pricing_agent.__class__.__name__}") # Setup storage agent if self.storage_agent: - self.storage_agent.environment.state = ( - self.storage_agent.environment.get_state(self, self.env) - ) - self.storage_agent.environment.env = self.env - self.storage_agent.reset_game() + # Check if agent has environment (RL agents) or not (rule-based/algorithm agents) + if hasattr(self.storage_agent, 'environment'): + # Set charging_hub and env in the environment for RL agents + self.storage_agent.environment.charging_hub = self + self.storage_agent.environment.env = self.env + self.storage_agent.environment.state = ( + self.storage_agent.environment.get_state(self, self.env) + ) + self.storage_agent.reset_game() + else: + # For rule-based/algorithm agents, just store the charging hub reference + if hasattr(self.storage_agent, 'set_charging_hub'): + self.storage_agent.set_charging_hub(self) + print(f"Initialized storage agent: {self.storage_agent.__class__.__name__}") # Link agents to operator self.operator.charging_agent = charging_agent diff --git a/simulation/operations/agent_decision_system.py b/simulation/operations/agent_decision_system.py new file mode 100644 index 0000000..c3e8ea5 --- /dev/null +++ b/simulation/operations/agent_decision_system.py @@ -0,0 +1,306 @@ +from typing import Any, Dict, List, Optional, Union, Type +import logging +from dataclasses import dataclass +from datetime import datetime +import uuid + +from utilities.rl_agents.interfaces import ( + BaseAgent, + DecisionType, + AgentType, + PricingAgent, + ChargingAgent, + StorageAgent, + RoutingAgent, + VehicleAssignmentAgent, + ParkingAllocationAgent, + GridManagementAgent, + DemandForecastingAgent +) +from simulation.operations.decision_request_system import ( + DecisionRequestSystem, + DecisionRequest, + DecisionResponse, + RequestStatus, + decision_system +) + +logger = logging.getLogger(__name__) + + +@dataclass +class AgentDecision: + """Represents a decision made by an agent""" + decision_id: str + agent_type: AgentType + decision_type: DecisionType + context: Dict[str, Any] + action: Dict[str, Any] + confidence: float + timestamp: datetime + metadata: Dict[str, Any] + + +class AgentDecisionSystem: + """ + Centralized system for managing all agent-based decisions. + + This system ensures that ALL decisions in the EV charging operations + are made by agents (RL agents, rule-based agents, etc.) rather than + being hardcoded in the business logic. + + Key principles: + 1. No decisions in business logic - all decisions go through agents + 2. Standardized interface for all agents + 3. Comprehensive tracking and monitoring + 4. Fallback mechanisms for reliability + 5. Support for multiple agent types (RL, rule-based, ML, etc.) + """ + + def __init__(self): + self.agents: Dict[DecisionType, BaseAgent] = {} + self.decision_history: List[AgentDecision] = [] + self.agent_registry: Dict[str, Type[BaseAgent]] = {} + self.decision_callbacks: Dict[str, callable] = {} + + def register_agent(self, decision_type: DecisionType, agent: BaseAgent) -> None: + """ + Register an agent for a specific decision type. + + Args: + decision_type: The type of decision this agent can make + agent: The agent instance + """ + self.agents[decision_type] = agent + logger.info(f"Registered {agent.__class__.__name__} for {decision_type.value} decisions") + + def register_agent_class(self, name: str, agent_class: Type[BaseAgent]) -> None: + """ + Register an agent class for dynamic instantiation. + + Args: + name: Name identifier for the agent class + agent_class: The agent class to register + """ + self.agent_registry[name] = agent_class + logger.info(f"Registered agent class {name}: {agent_class.__name__}") + + def make_decision( + self, + decision_type: DecisionType, + context: Dict[str, Any], + vehicles: Optional[List[Any]] = None, + priority: int = 1, + timeout_seconds: float = 30.0 + ) -> AgentDecision: + """ + Make a decision using the appropriate agent. + + This is the main entry point for all decisions in the system. + Every decision request goes through this method. + + Args: + decision_type: Type of decision needed + context: Context information for the decision + vehicles: List of vehicles (for vehicle-related decisions) + priority: Decision priority (1-10, higher = more important) + timeout_seconds: Timeout for the decision + + Returns: + AgentDecision object containing the decision result + + Raises: + ValueError: If no agent is registered for the decision type + """ + if decision_type not in self.agents: + raise ValueError(f"No agent registered for decision type: {decision_type.value}") + + agent = self.agents[decision_type] + decision_id = str(uuid.uuid4()) + + # Create decision request for tracking + request_id = decision_system.create_request( + agent_type=decision_type, + state=agent.get_state(), + context=context, + priority=priority, + timeout_seconds=timeout_seconds, + metadata={ + "agent_type": agent.agent_type.value, + "decision_id": decision_id + } + ) + + try: + # Update agent state + agent.update_state(context) + + # Make decision based on agent type + if decision_type in [DecisionType.CHARGING, DecisionType.ROUTING, + DecisionType.VEHICLE_ASSIGNMENT, DecisionType.PARKING_ALLOCATION]: + # Vehicle-related decisions + if vehicles is None: + vehicles = [] + action_result = agent.select_action(vehicles, context) + else: + # Non-vehicle decisions + action_result = agent.select_action(context) + + # Process the request + response = decision_system.process_request(request_id) + + # Create decision record + decision = AgentDecision( + decision_id=decision_id, + agent_type=agent.agent_type, + decision_type=decision_type, + context=context, + action=action_result, + confidence=action_result.get("confidence", 0.5), + timestamp=datetime.now(), + metadata={ + "request_id": request_id, + "agent_class": agent.__class__.__name__, + "vehicles_count": len(vehicles) if vehicles else 0 + } + ) + + # Store decision in history + self.decision_history.append(decision) + + # Call any registered callbacks + if decision_type.value in self.decision_callbacks: + self.decision_callbacks[decision_type.value](decision) + + logger.info(f"Decision made: {decision_type.value} by {agent.__class__.__name__}") + return decision + + except Exception as e: + logger.error(f"Error making {decision_type.value} decision: {e}") + # Mark request as failed + if request_id in decision_system.requests: + decision_system.requests[request_id].status = RequestStatus.FAILED + decision_system.requests[request_id].metadata["error"] = str(e) + raise + + def make_pricing_decision(self, context: Dict[str, Any]) -> AgentDecision: + """Make a pricing decision using the pricing agent.""" + return self.make_decision(DecisionType.PRICING, context) + + def make_charging_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision: + """Make a charging decision using the charging agent.""" + return self.make_decision(DecisionType.CHARGING, context, vehicles) + + def make_storage_decision(self, context: Dict[str, Any]) -> AgentDecision: + """Make a storage decision using the storage agent.""" + return self.make_decision(DecisionType.STORAGE, context) + + def make_routing_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision: + """Make a routing decision using the routing agent.""" + return self.make_decision(DecisionType.ROUTING, context, vehicles) + + def make_vehicle_assignment_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision: + """Make a vehicle assignment decision using the vehicle assignment agent.""" + return self.make_decision(DecisionType.VEHICLE_ASSIGNMENT, context, vehicles) + + def make_parking_allocation_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision: + """Make a parking allocation decision using the parking allocation agent.""" + return self.make_decision(DecisionType.PARKING_ALLOCATION, context, vehicles) + + def make_grid_management_decision(self, context: Dict[str, Any]) -> AgentDecision: + """Make a grid management decision using the grid management agent.""" + return self.make_decision(DecisionType.GRID_MANAGEMENT, context) + + def make_demand_forecasting_decision(self, context: Dict[str, Any]) -> AgentDecision: + """Make a demand forecasting decision using the demand forecasting agent.""" + return self.make_decision(DecisionType.DEMAND_FORECASTING, context) + + def register_decision_callback(self, decision_type: DecisionType, callback: callable) -> None: + """ + Register a callback to be called when a decision is made. + + Args: + decision_type: The decision type to monitor + callback: Function to call with the decision result + """ + self.decision_callbacks[decision_type.value] = callback + logger.info(f"Registered callback for {decision_type.value} decisions") + + def get_decision_history(self, decision_type: Optional[DecisionType] = None) -> List[AgentDecision]: + """ + Get decision history, optionally filtered by decision type. + + Args: + decision_type: Optional filter for specific decision type + + Returns: + List of decisions + """ + if decision_type: + return [d for d in self.decision_history if d.decision_type == decision_type] + return self.decision_history.copy() + + def get_agent_performance_stats(self) -> Dict[str, Any]: + """ + Get performance statistics for all agents. + + Returns: + Dictionary containing performance statistics + """ + stats = {} + + for decision_type, agent in self.agents.items(): + decisions = self.get_decision_history(decision_type) + + if decisions: + avg_confidence = sum(d.confidence for d in decisions) / len(decisions) + success_rate = len([d for d in decisions if d.confidence > 0.5]) / len(decisions) + + stats[decision_type.value] = { + "agent_type": agent.agent_type.value, + "agent_class": agent.__class__.__name__, + "total_decisions": len(decisions), + "average_confidence": avg_confidence, + "success_rate": success_rate, + "last_decision": decisions[-1].timestamp if decisions else None + } + else: + stats[decision_type.value] = { + "agent_type": agent.agent_type.value, + "agent_class": agent.__class__.__name__, + "total_decisions": 0, + "average_confidence": 0.0, + "success_rate": 0.0, + "last_decision": None + } + + return stats + + def reset_agents(self) -> None: + """Reset all registered agents.""" + for agent in self.agents.values(): + agent.reset() + logger.info("All agents reset") + + def cleanup_old_decisions(self, max_age_hours: float = 24.0) -> None: + """ + Clean up old decisions from history. + + Args: + max_age_hours: Maximum age of decisions to keep + """ + cutoff_time = datetime.now().timestamp() - (max_age_hours * 3600) + + original_count = len(self.decision_history) + self.decision_history = [ + d for d in self.decision_history + if d.timestamp.timestamp() > cutoff_time + ] + + removed_count = original_count - len(self.decision_history) + if removed_count > 0: + logger.info(f"Cleaned up {removed_count} old decisions") + + +# Global instance for easy access +agent_decision_system = AgentDecisionSystem() diff --git a/simulation/operations/agents_controller.py b/simulation/operations/agents_controller.py new file mode 100644 index 0000000..bea4182 --- /dev/null +++ b/simulation/operations/agents_controller.py @@ -0,0 +1,43 @@ +from typing import Optional, Dict, Any, List + +from utilities.rl_agents.interfaces import PricingAgent, ChargingAgent, StorageAgent + + +class AgentsController: + def __init__(self, + pricing: Optional[PricingAgent] = None, + charging: Optional[ChargingAgent] = None, + storage: Optional[StorageAgent] = None): + self.pricing = pricing + self.charging = charging + self.storage = storage + + def reset_all(self) -> None: + for agent in (self.pricing, self.charging, self.storage): + if agent: + agent.reset() + + def pricing_step(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]: + if not self.pricing: + return None + self.pricing.update_state(context) + return self.pricing.select_action(context) + + def charging_step(self, vehicles: List[Any], context: Dict[str, Any]) -> Optional[Dict[str, Any]]: + if not self.charging: + return None + self.charging.update_state(context) + return self.charging.select_action(vehicles, context) + + def storage_step(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]: + if not self.storage: + return None + self.storage.update_state(context) + return self.storage.select_action(context) + + def learn_all(self, transition: Dict[str, Any]) -> None: + for agent in (self.pricing, self.charging, self.storage): + if agent: + agent.learn(transition) + + diff --git a/simulation/operations/charging_service.py b/simulation/operations/charging_service.py new file mode 100644 index 0000000..c2a1f26 --- /dev/null +++ b/simulation/operations/charging_service.py @@ -0,0 +1,198 @@ +from typing import List, Dict, Any, Optional +from simulation.operations.agents_controller import AgentsController +from simulation.config_facade import ConfigFacade +from .decision_request_system import DecisionType, decision_system +from .decision_decorators import auto_register_agents + + +class ChargingService: + """ + Service class for managing charging-related RL agent operations. + + Encapsulates all charging agent logic that was previously in the Operator class, + providing a clean separation of concerns and standardized interface for RL agents. + """ + + def __init__(self, operator_instance: Any, agents_controller: Optional[AgentsController] = None, config_facade: Optional[ConfigFacade] = None): + """ + Initialize the ChargingService. + + Args: + operator_instance: Reference to the main operator instance + agents_controller: Controller for managing RL agents + config_facade: Facade for accessing configuration values + """ + self.op = operator_instance + self.agents_controller = agents_controller + self.config = config_facade or ConfigFacade() + + # Register agents with the decision request system + auto_register_agents(operator_instance) + + def take_learning_charging_actions(self, charging_strategy: str) -> None: + """ + Execute learning-based charging actions using RL agents. + + Args: + charging_strategy: The charging strategy to use + """ + if charging_strategy == "dynamic": + self.op.update_vehicles_status() + self.take_charging_action() + self.conduct_charging_action() + + if self.op.storage_agent: + self.op.get_exp_free_grid_capacity() + # Storage actions are handled by StorageService + if hasattr(self.op, 'storage_service'): + self.op.storage_service.take_storage_action() + self.op.storage_service.conduct_storage_action() + + def update_learning_charging_agent(self, charging_strategy: str) -> None: + """ + Update the learning charging agent. + + Args: + charging_strategy: The charging strategy to use + """ + if charging_strategy == "dynamic": + self.update_charging_agent() + + def take_charging_action(self) -> None: + """ + Take charging action using the RL charging agent. + """ + if self.agents_controller and self.agents_controller.charging: + # Use the controller to get charging action + context = { + "charging_hub": self.op.charging_hub, + "env": self.op.env + } + action_result = self.agents_controller.charging_step( + vehicles=self.op.requests, + context=context + ) + if action_result: + self.op.charging_agent.action = action_result.get("charging_action") + else: + # Use decision request system for charging decisions + action = self._get_charging_decision_via_request() + self.op.charging_agent.action = action + + def conduct_charging_action(self) -> None: + """ + Execute the charging action by applying it to vehicles and chargers. + """ + action = self.op.charging_agent.action + action_index = 1 # Start from 1 because action[0] is reserved (possibly for pricing or metadata) + + for charger in self.op.charging_hub.chargers: + for connector_idx in range(charger.number_of_connectors): + if action_index >= len(action): + break # Prevent index error if action list is shorter than expected + + charging_power = action[action_index] + if charging_power > 0: + charging_vehicles = charger.charging_vehicles + if connector_idx < len(charging_vehicles): + vehicle = charging_vehicles[connector_idx] + vehicle.charging_power = charging_power + action_index += 1 + + self.op.check_charging_power() + self.op.charging_hub.grid.reset_reward() + + def update_charging_agent(self) -> None: + """ + Update the charging agent with new state and experience. + """ + self.op.update_vehicles_status() + self.op.charging_hub.reward["missed"] = self.op.reward_computing() + + eval_ep = self.op.charging_agent.do_evaluation_iterations + self.op.charging_agent.conduct_action(self.op.charging_agent.action) + if self.op.charging_agent.time_for_critic_and_actor_to_learn(): + if not eval_ep: + for _ in range( + self.op.charging_agent.hyperparameters[ + "learning_updates_per_learning_session" + ] + ): + self.op.charging_agent.learn() + mask = ( + False + if self.op.charging_agent.episode_step_number_val + >= self.op.charging_agent.environment.MAX_EPISODE_STEPS + else self.op.charging_agent.done + ) + # if not eval_ep: + action = self.op.charging_agent.descale_action(self.op.charging_agent.action) + self.op.charging_agent.save_experience( + experience=( + self.op.charging_agent.state, + action, + self.op.charging_agent.reward, + self.op.charging_agent.next_state, + mask, + ) + ) + self.op.charging_agent.global_step_number += 1 + self.op.charging_agent.step_counter += 1 + + def _get_charging_decision_via_request(self) -> Any: + """ + Get charging decision through the decision request system. + + Returns: + The charging action/decision + """ + # Get current state from environment + state = self.op.charging_hub.charging_agent.environment.get_state( + self.op.charging_hub, self.op.env + ) + self.op.charging_agent.state = state + + eval_ep = self.op.charging_agent.do_evaluation_iterations + self.op.charging_agent.episode_step_number_val = 0 + + # Create context for the decision request + context = { + "eval_ep": eval_ep, + "charging_hub": self.op.charging_hub, + "env": self.op.env, + "vehicles": self.op.requests + } + + # Create and process decision request + request_id = decision_system.create_request( + agent_type=DecisionType.CHARGING, + state=self.op.charging_agent.state, + context=context, + metadata={ + "agent_name": getattr(self.op.charging_agent, "agent_name", "Unknown") + } + ) + + # Process the request + response = decision_system.process_request(request_id) + + if response: + # Rescale action if needed + if hasattr(self.op.charging_agent, "rescale_action"): + return self.op.charging_agent.rescale_action(response.action) + else: + return response.action + else: + # Fallback to direct agent call if request system fails + # Handle different pick_action signatures + import inspect + sig = inspect.signature(self.op.charging_agent.pick_action) + if len(sig.parameters) > 1: # Method expects eval_ep parameter + action = self.op.charging_agent.pick_action(eval_ep) + else: # Method doesn't expect eval_ep parameter + action = self.op.charging_agent.pick_action() + + if hasattr(self.op.charging_agent, "rescale_action"): + return self.op.charging_agent.rescale_action(action) + else: + return action diff --git a/simulation/operations/decision_decorators.py b/simulation/operations/decision_decorators.py new file mode 100644 index 0000000..a457f11 --- /dev/null +++ b/simulation/operations/decision_decorators.py @@ -0,0 +1,212 @@ +from typing import Any, Dict, Optional, Callable +from functools import wraps +import logging +from .decision_request_system import ( + DecisionRequestSystem, + DecisionType, + decision_system +) + +logger = logging.getLogger(__name__) + + +def require_decision_request(decision_type: DecisionType, timeout_seconds: float = 30.0): + """ + Decorator that automatically creates a decision request when an RL agent method is called. + + This decorator can be applied to methods like pick_action() to ensure that every + decision is tracked through the request system. + + Args: + decision_type: The type of decision being made + timeout_seconds: Timeout for the request + + Example: + @require_decision_request(DecisionType.PRICING) + def pick_action(self, eval_ep=False): + # Original pick_action implementation + pass + """ + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(self, *args, **kwargs): + # Create context from method arguments + context = { + "method_name": func.__name__, + "args": args, + "kwargs": kwargs, + "eval_ep": kwargs.get("eval_ep", False) + } + + # Get current state from the agent + state = getattr(self, "state", None) + + # Create decision request + request_id = decision_system.create_request( + agent_type=decision_type, + state=state, + context=context, + timeout_seconds=timeout_seconds, + metadata={ + "agent_class": self.__class__.__name__, + "method": func.__name__ + } + ) + + logger.info(f"Created decision request {request_id} for {decision_type.value}") + + try: + # Process the request immediately + response = decision_system.process_request(request_id) + + if response: + logger.info(f"Decision request {request_id} completed successfully") + return response.action + else: + logger.warning(f"Decision request {request_id} failed, falling back to direct call") + # Fallback to original method + return func(self, *args, **kwargs) + + except Exception as e: + logger.error(f"Error processing decision request {request_id}: {e}") + # Fallback to original method + return func(self, *args, **kwargs) + + return wrapper + return decorator + + +def track_decision(decision_type: DecisionType): + """ + Decorator that tracks decisions without requiring the request system. + + This is a lighter-weight decorator that just logs decisions without + going through the full request system. + + Args: + decision_type: The type of decision being made + """ + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(self, *args, **kwargs): + # Log the decision attempt + logger.info(f"Making {decision_type.value} decision via {func.__name__}") + + # Call the original method + result = func(self, *args, **kwargs) + + # Log the decision result + logger.info(f"Completed {decision_type.value} decision: {result}") + + return result + return wrapper + return decorator + + +class DecisionRequestMixin: + """ + Mixin class that provides decision request functionality to RL agents. + + This mixin can be added to RL agent classes to provide standardized + decision request capabilities. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._decision_type = None + self._last_request_id = None + + def set_decision_type(self, decision_type: DecisionType) -> None: + """Set the decision type for this agent""" + self._decision_type = decision_type + + def make_decision_request( + self, + state: Any, + context: Optional[Dict[str, Any]] = None, + priority: int = 1, + timeout_seconds: float = 30.0 + ) -> Optional[Any]: + """ + Make a decision request through the decision system. + + Args: + state: Current state for the agent + context: Additional context + priority: Request priority + timeout_seconds: Request timeout + + Returns: + The decision/action if successful, None if failed + """ + if not self._decision_type: + raise ValueError("Decision type not set for this agent") + + if context is None: + context = {} + + # Create request + request_id = decision_system.create_request( + agent_type=self._decision_type, + state=state, + context=context, + priority=priority, + timeout_seconds=timeout_seconds, + metadata={ + "agent_class": self.__class__.__name__, + "agent_id": id(self) + } + ) + + self._last_request_id = request_id + + # Process request + response = decision_system.process_request(request_id) + + if response: + return response.action + else: + return None + + def get_last_request_status(self) -> Optional[str]: + """Get the status of the last request made by this agent""" + if self._last_request_id: + status = decision_system.get_request_status(self._last_request_id) + return status.value if status else None + return None + + +def register_agent_with_system(agent: Any, decision_type: DecisionType) -> None: + """ + Register an agent with the decision request system. + + Args: + agent: The RL agent to register + decision_type: The type of decisions this agent can make + """ + decision_system.register_agent_handler(decision_type, agent) + logger.info(f"Registered {agent.__class__.__name__} for {decision_type.value} decisions") + + +def auto_register_agents(operator_instance: Any) -> None: + """ + Automatically register all agents from an operator instance with the decision system. + + Args: + operator_instance: The operator instance containing agents + """ + from .decision_request_system import DecisionType + + # Register pricing agent + if hasattr(operator_instance, "pricing_agent") and operator_instance.pricing_agent: + register_agent_with_system(operator_instance.pricing_agent, DecisionType.PRICING) + + # Register charging agent + if hasattr(operator_instance, "charging_agent") and operator_instance.charging_agent: + register_agent_with_system(operator_instance.charging_agent, DecisionType.CHARGING) + + # Register storage agent + if hasattr(operator_instance, "storage_agent") and operator_instance.storage_agent: + register_agent_with_system(operator_instance.storage_agent, DecisionType.STORAGE) + + logger.info("Auto-registered agents with decision request system") diff --git a/simulation/operations/decision_request_system.py b/simulation/operations/decision_request_system.py new file mode 100644 index 0000000..03ff17f --- /dev/null +++ b/simulation/operations/decision_request_system.py @@ -0,0 +1,344 @@ +from typing import Any, Dict, List, Optional, Union +from dataclasses import dataclass +from enum import Enum +import time +import uuid +from datetime import datetime + + +class DecisionType(Enum): + """Types of decisions that RL agents can make""" + PRICING = "pricing" + CHARGING = "charging" + STORAGE = "storage" + ROUTING = "routing" + + +class RequestStatus(Enum): + """Status of a decision request""" + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + TIMEOUT = "timeout" + + +@dataclass +class DecisionRequest: + """Represents a decision request for an RL agent""" + request_id: str + agent_type: DecisionType + state: Any + context: Dict[str, Any] + timestamp: datetime + status: RequestStatus + priority: int = 1 + timeout_seconds: float = 30.0 + metadata: Optional[Dict[str, Any]] = None + + def __post_init__(self): + if self.metadata is None: + self.metadata = {} + + +@dataclass +class DecisionResponse: + """Represents a response to a decision request""" + request_id: str + action: Any + confidence: Optional[float] = None + reasoning: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None + + def __post_init__(self): + if self.metadata is None: + self.metadata = {} + + +class DecisionRequestSystem: + """ + Centralized system for managing decision requests from RL agents. + + This system provides a standardized way for RL agents to request decisions, + track request status, and handle responses. It supports: + - Request queuing and prioritization + - Timeout handling + - Request tracking and logging + - Integration with existing RL agent infrastructure + """ + + def __init__(self): + self.requests: Dict[str, DecisionRequest] = {} + self.responses: Dict[str, DecisionResponse] = {} + self.request_history: List[DecisionRequest] = [] + self.agent_handlers: Dict[DecisionType, Any] = {} + self.request_callbacks: Dict[str, callable] = {} + + def register_agent_handler(self, decision_type: DecisionType, handler: Any) -> None: + """ + Register an agent handler for a specific decision type. + + Args: + decision_type: The type of decision this handler can process + handler: The agent object that can make decisions + """ + self.agent_handlers[decision_type] = handler + + def create_request( + self, + agent_type: DecisionType, + state: Any, + context: Dict[str, Any], + priority: int = 1, + timeout_seconds: float = 30.0, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """ + Create a new decision request. + + Args: + agent_type: Type of decision needed + state: Current state for the agent + context: Additional context information + priority: Request priority (higher = more important) + timeout_seconds: Timeout for the request + metadata: Additional metadata + + Returns: + Request ID for tracking + """ + request_id = str(uuid.uuid4()) + + request = DecisionRequest( + request_id=request_id, + agent_type=agent_type, + state=state, + context=context, + timestamp=datetime.now(), + status=RequestStatus.PENDING, + priority=priority, + timeout_seconds=timeout_seconds, + metadata=metadata or {} + ) + + self.requests[request_id] = request + return request_id + + def process_request(self, request_id: str) -> Optional[DecisionResponse]: + """ + Process a decision request using the appropriate agent. + + Args: + request_id: ID of the request to process + + Returns: + Decision response if successful, None if failed + """ + if request_id not in self.requests: + raise ValueError(f"Request {request_id} not found") + + request = self.requests[request_id] + + # Check if agent handler exists + if request.agent_type not in self.agent_handlers: + request.status = RequestStatus.FAILED + request.metadata["error"] = f"No handler registered for {request.agent_type}" + return None + + # Check timeout + if self._is_request_timed_out(request): + request.status = RequestStatus.TIMEOUT + return None + + # Update status + request.status = RequestStatus.PROCESSING + + try: + # Get the appropriate agent handler + agent = self.agent_handlers[request.agent_type] + + # Process the request based on agent type + response = self._process_with_agent(agent, request) + + if response: + request.status = RequestStatus.COMPLETED + self.responses[request_id] = response + else: + request.status = RequestStatus.FAILED + + return response + + except Exception as e: + request.status = RequestStatus.FAILED + request.metadata["error"] = str(e) + return None + + def _process_with_agent(self, agent: Any, request: DecisionRequest) -> Optional[DecisionResponse]: + """ + Process request with the specific agent type. + + Args: + agent: The agent to use for decision making + request: The decision request + + Returns: + Decision response + """ + try: + if request.agent_type == DecisionType.PRICING: + return self._process_pricing_request(agent, request) + elif request.agent_type == DecisionType.CHARGING: + return self._process_charging_request(agent, request) + elif request.agent_type == DecisionType.STORAGE: + return self._process_storage_request(agent, request) + elif request.agent_type == DecisionType.ROUTING: + return self._process_routing_request(agent, request) + else: + raise ValueError(f"Unknown decision type: {request.agent_type}") + + except Exception as e: + request.metadata["processing_error"] = str(e) + return None + + def _process_pricing_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse: + """Process pricing decision request""" + # Set agent state + agent.state = request.state + + # Get evaluation flag from context + eval_ep = request.context.get("eval_ep", False) + + # Get action from agent + if hasattr(agent, "pick_action"): + # Handle different pick_action signatures + import inspect + sig = inspect.signature(agent.pick_action) + if len(sig.parameters) > 1: # Method expects eval_ep parameter + action = agent.pick_action(eval_ep) + else: # Method doesn't expect eval_ep parameter + action = agent.pick_action() + else: + raise ValueError("Agent does not have pick_action method") + + return DecisionResponse( + request_id=request.request_id, + action=action, + metadata={"agent_type": "pricing", "eval_ep": eval_ep} + ) + + def _process_charging_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse: + """Process charging decision request""" + # Set agent state + agent.state = request.state + + # Get evaluation flag from context + eval_ep = request.context.get("eval_ep", False) + + # Get action from agent + if hasattr(agent, "pick_action"): + # Handle different pick_action signatures + import inspect + sig = inspect.signature(agent.pick_action) + if len(sig.parameters) > 1: # Method expects eval_ep parameter + action = agent.pick_action(eval_ep) + else: # Method doesn't expect eval_ep parameter + action = agent.pick_action() + else: + raise ValueError("Agent does not have pick_action method") + + # Rescale action if needed + if hasattr(agent, "rescale_action"): + action = agent.rescale_action(action) + + return DecisionResponse( + request_id=request.request_id, + action=action, + metadata={"agent_type": "charging", "eval_ep": eval_ep} + ) + + def _process_storage_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse: + """Process storage decision request""" + # Set agent state + agent.state = request.state + + # Get evaluation flag from context + eval_ep = request.context.get("eval_ep", False) + charging_hub = request.context.get("charging_hub", None) + + # Get action from agent + if hasattr(agent, "pick_action"): + # Handle different pick_action signatures + import inspect + sig = inspect.signature(agent.pick_action) + if len(sig.parameters) > 2: # Method expects eval_ep and charging_hub parameters + action = agent.pick_action(eval_ep, charging_hub) + elif len(sig.parameters) > 1: # Method expects eval_ep parameter + action = agent.pick_action(eval_ep) + else: # Method doesn't expect eval_ep parameter + action = agent.pick_action() + else: + raise ValueError("Agent does not have pick_action method") + + return DecisionResponse( + request_id=request.request_id, + action=action, + metadata={"agent_type": "storage", "eval_ep": eval_ep} + ) + + def _process_routing_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse: + """Process routing decision request""" + # For routing, we might need different logic + # This is a placeholder for future implementation + raise NotImplementedError("Routing decisions not yet implemented") + + def get_response(self, request_id: str) -> Optional[DecisionResponse]: + """Get the response for a request""" + return self.responses.get(request_id) + + def get_request_status(self, request_id: str) -> Optional[RequestStatus]: + """Get the status of a request""" + if request_id in self.requests: + return self.requests[request_id].status + return None + + def _is_request_timed_out(self, request: DecisionRequest) -> bool: + """Check if a request has timed out""" + elapsed = (datetime.now() - request.timestamp).total_seconds() + return elapsed > request.timeout_seconds + + def cleanup_old_requests(self, max_age_hours: float = 24.0) -> None: + """Clean up old requests and responses""" + current_time = datetime.now() + cutoff_time = current_time.timestamp() - (max_age_hours * 3600) + + # Move old requests to history + old_requests = [ + req for req in self.requests.values() + if req.timestamp.timestamp() < cutoff_time + ] + + for req in old_requests: + self.request_history.append(req) + del self.requests[req.request_id] + if req.request_id in self.responses: + del self.responses[req.request_id] + + def get_statistics(self) -> Dict[str, Any]: + """Get statistics about the request system""" + total_requests = len(self.requests) + len(self.request_history) + completed = len([r for r in self.requests.values() if r.status == RequestStatus.COMPLETED]) + failed = len([r for r in self.requests.values() if r.status == RequestStatus.FAILED]) + pending = len([r for r in self.requests.values() if r.status == RequestStatus.PENDING]) + + return { + "total_requests": total_requests, + "active_requests": len(self.requests), + "completed": completed, + "failed": failed, + "pending": pending, + "success_rate": completed / total_requests if total_requests > 0 else 0.0 + } + + +# Global instance for easy access +decision_system = DecisionRequestSystem() diff --git a/simulation/operations/operator.py b/simulation/operations/operator.py index a92c65c..fd18a13 100644 --- a/simulation/operations/operator.py +++ b/simulation/operations/operator.py @@ -12,6 +12,9 @@ from simulation.operations.NonLinearAlgorithms import nonlinear_pricing from simulation.operations.Operator_utils import compute_free_grid_capacity +from simulation.operations.pricing_service import PricingService +from simulation.operations.charging_service import ChargingService +from simulation.operations.storage_service import StorageService from utilities.rl_environments.rl_pricing_env import convert_to_vector @@ -79,6 +82,7 @@ def __init__( service_level: float, charging_hub: Any, minimum_served_demand: float, + agents_controller: Optional[Any] = None, ): """ Initialize the Operator with simulation parameters and strategies. @@ -129,6 +133,12 @@ def __init__( ) self._init_agents_and_events() self._init_capacity_tracking() + # Optional RL agents controller (pricing/charging/storage) + self.agents_controller = agents_controller + # Service composition for RL agents + self.pricing_service = PricingService(operator=self, agents_controller=self.agents_controller) + self.charging_service = ChargingService(operator_instance=self, agents_controller=self.agents_controller) + self.storage_service = StorageService(operator_instance=self, agents_controller=self.agents_controller) # Initialize based on configuration self._initialize_strategy_dependent_behavior() @@ -545,92 +555,39 @@ def _set_storage_discharging(self, power: float) -> None: def take_dynamic_pricing_actions(self) -> None: """Execute dynamic pricing actions and update price history.""" - self.get_exp_free_grid_capacity() - self.update_vehicles_status() - self.take_pricing_action() - self._update_dynamic_price_history() + self.pricing_service.take_dynamic_pricing_actions() def take_static_pricing_action(self) -> None: """Execute static pricing actions and update price history.""" - self.get_exp_free_grid_capacity() - self.update_vehicles_status() - self._update_pricing_parameters() - self._update_static_price_history() + self.pricing_service.take_static_pricing_action() def _update_dynamic_price_history(self) -> None: - """Update price history for dynamic pricing modes.""" - if self.pricing_mode == "Discrete": - self._add_discrete_price_to_history() - elif self.pricing_mode == "Continuous": - self._add_continuous_price_to_history() + # Backward-compat shim; delegate to service + self.pricing_service._update_dynamic_price_history() def _update_static_price_history(self) -> None: - """Update price history for static pricing modes.""" - if self.pricing_mode == "Discrete": - self._add_discrete_price_to_history() - elif self.pricing_mode in ["Continuous", "ToU"]: - self._add_continuous_price_to_history() + self.pricing_service._update_static_price_history() def _update_pricing_parameters(self) -> None: - """Update pricing parameters based on current mode and time.""" - if self.pricing_mode == "ToU": - self._update_tou_pricing() - elif self.pricing_mode == "perfect_info": - self._update_perfect_info_pricing() + self.pricing_service._update_pricing_parameters() def _update_tou_pricing(self) -> None: - """Update Time-of-Use pricing parameters.""" - hour = self._get_current_hour() - max_price = Configuration.instance().max_price_ToU - self.pricing_parameters[0] = ( - self.electricity_tariff[hour] / max(self.electricity_tariff) * max_price - ) + self.pricing_service._update_tou_pricing() def _update_perfect_info_pricing(self) -> None: - """Update perfect information pricing parameters.""" - hour = self._get_current_hour() - config = Configuration.instance() - - if config.dynamic_fix_term_pricing: - self.pricing_parameters[1] = self.price_schedules[1][hour] - self.pricing_parameters[0] = self.price_schedules[0][hour] - else: - self.pricing_parameters[1] = self.price_schedules[hour] + self.pricing_service._update_perfect_info_pricing() def _get_current_hour(self) -> int: - """Get current hour of the simulation.""" - return int((self.env.now % 1440) / 60) + return self.pricing_service._get_current_hour() def _add_discrete_price_to_history(self) -> None: - """Add discrete pricing data to price history.""" - self.price_history = pd.concat([ - self.price_history, - pd.DataFrame(self.price_pairs[:, 1]).transpose(), - ]) + self.pricing_service._add_discrete_price_to_history() def _add_continuous_price_to_history(self) -> None: - """Add continuous pricing data to price history.""" - self.price_history = pd.concat([ - self.price_history, - pd.DataFrame([ - self.pricing_parameters[0], - self.pricing_parameters[1] - ]).transpose(), - ]) + self.pricing_service._add_continuous_price_to_history() def get_current_pricing_data(self) -> PricingData: - """ - Get current pricing information as structured data. - - Returns: - PricingData: Current pricing information - """ - return PricingData( - energy_price=self.pricing_parameters[0] if len(self.pricing_parameters) > 0 else 0.0, - parking_price=self.parking_fee, - pricing_mode=self.pricing_mode, - price_history=self.price_history - ) + return self.pricing_service.get_current_pricing_data() # ============================================================================ # CHARGING ACTION METHODS @@ -793,22 +750,15 @@ def _update_peak_threshold(self) -> None: self.peak_threshold = current_peak def take_learning_charging_actions(self, charging_strategy): if charging_strategy == "dynamic": - self.update_vehicles_status() - self.take_charging_action() - self.conduct_charging_action() - - if self.storage_agent: - self.get_exp_free_grid_capacity() - self.take_storage_action() - self.conduct_storage_action() + self.charging_service.take_learning_charging_actions(charging_strategy) def update_learning_charging_and_pricing_agents(self, charging_strategy): if charging_strategy == "dynamic": - self.update_charging_agent() + self.charging_service.update_learning_charging_agent(charging_strategy) if self.storage_agent: - self.update_storage_agent() + self.storage_service.update_storage_agent() if self.charging_hub.dynamic_pricing: - self.update_pricing_agent() + self.pricing_service.update_pricing_agent() def get_charging_schedules_and_prices(self, charging_strategy, mode): """ @@ -849,93 +799,11 @@ def get_charging_schedules_and_prices(self, charging_strategy, mode): self.update_learning_charging_and_pricing_agents(charging_strategy) - def take_charging_action(self): - state = self.charging_hub.charging_agent.environment.get_state( - self.charging_hub, self.env - ) - self.charging_agent.state = state - - eval_ep = self.charging_agent.do_evaluation_iterations - self.charging_agent.episode_step_number_val = 0 - # while not self.done: - action = self.charging_agent.pick_action(eval_ep, self.charging_hub) - self.charging_agent.action = self.charging_agent.rescale_action(action) - - def take_pricing_action(self): - # Get current state from environment - pricing_state = self.pricing_agent.environment.get_state(self.charging_hub, self.env) - self.pricing_agent.state = pricing_state - eval_ep = self.pricing_agent.do_evaluation_iterations - - pricing_mode = Configuration.instance().pricing_mode - agent_name = self.pricing_agent.agent_name - - if pricing_mode == "Discrete": - if agent_name == "DQN": - self.pricing_agent.action = self.pricing_agent.pick_action() - if len(self.price_pairs[:, 1]) > 1: - vector_prices = convert_to_vector(self.pricing_agent.action) - else: - vector_prices = [self.pricing_agent.action] - final_pricing = self.pricing_agent.environment.get_final_prices_DQN(vector_prices) - for i, price in enumerate(final_pricing): - self.price_pairs[i, 1] = price - - elif agent_name == "SAC": - self.pricing_agent.action = self.pricing_agent.pick_action(eval_ep, self.charging_hub) - rescaled_actions = self.pricing_agent.environment.rescale_action(self.pricing_agent.action) - number_of_power_options = len(self.price_pairs[:, 1]) - final_pricing = rescaled_actions[:number_of_power_options] - self.price_pairs[0, 1] = final_pricing[0] - self.price_pairs[1, 1] = min(final_pricing[1], 1.5) - - # Optional: handle grid capacity and storage - # if Configuration.instance().limiting_grid_capa: - # self.grid_capa = rescaled_actions[number_of_power_options] - # if len(rescaled_actions) >= number_of_power_options + 2: - # self.storage_agent.action = [rescaled_actions[number_of_power_options + 1]] - # self.conduct_storage_action() - - elif pricing_mode == "Continuous": - self.pricing_agent.action = self.pricing_agent.pick_action(eval_ep, self.charging_hub) - rescaled_actions = self.pricing_agent.environment.rescale_action(self.pricing_agent.action) - - config = Configuration.instance() - if not config.dynamic_fix_term_pricing and config.capacity_pricing: - self.pricing_parameters[1] = rescaled_actions[0] - - elif config.dynamic_fix_term_pricing and not config.capacity_pricing: - self.pricing_parameters[0] = rescaled_actions[0] - if config.dynamic_parking_fee: - self.parking_fee = rescaled_actions[1] - - elif config.dynamic_fix_term_pricing and config.capacity_pricing: - self.pricing_parameters[0] = rescaled_actions[0] - self.pricing_parameters[1] = rescaled_actions[1] - - if config.limiting_grid_capa: - self.grid_capa = rescaled_actions[1] - - if config.dynamic_storage_scheduling: - self.storage_agent.action = [rescaled_actions[1]] - - self.conduct_storage_action(given_storage_action=[rescaled_actions[1]]) - - # Reset reward at the end - self.charging_hub.grid.reset_reward() #TODO: it does not belong to the grid object - - def take_storage_action(self): - storage_state = self.charging_hub.storage_agent.environment.get_state( - self.charging_hub, self.env - ) - self.storage_agent.state = storage_state - eval_ep = self.storage_agent.do_evaluation_iterations - self.storage_agent.episode_step_number_val = 0 - # while not self.done: - self.storage_agent.action = self.storage_agent.pick_action( - eval_ep, self.charging_hub - ) + + + + def get_battery_max_min(self): bound_1 = ( @@ -1072,160 +940,17 @@ def check_storage(self, given_storage_action=None): # Track feasibility deviation self.charging_hub.reward["feasibility_storage"] += abs(raw_storage_power - storage_power) - def conduct_storage_action(self, given_storage_action=None): - if given_storage_action: - storage_power = given_storage_action[0] - else: - storage_power = self.storage_agent.action[0] - if storage_power >= 0: - self.charging_hub.electric_storage.charge_yn = 1 - self.charging_hub.electric_storage.charging_power = storage_power - self.charging_hub.electric_storage.discharge_yn = 0 - self.charging_hub.electric_storage.discharging_power = 0 - elif storage_power < 0: - self.charging_hub.electric_storage.charge_yn = 0 - self.charging_hub.electric_storage.charging_power = 0 - self.charging_hub.electric_storage.discharge_yn = 1 - self.charging_hub.electric_storage.discharging_power = -storage_power - self.check_storage(given_storage_action=given_storage_action) - - def conduct_charging_action(self): - action = self.charging_agent.action - action_index = 1 # Start from 1 because action[0] is reserved (possibly for pricing or metadata) - for charger in self.charging_hub.chargers: - for connector_idx in range(charger.number_of_connectors): - if action_index >= len(action): - break # Prevent index error if action list is shorter than expected - charging_power = action[action_index] - if charging_power > 0: - charging_vehicles = charger.charging_vehicles - if connector_idx < len(charging_vehicles): - vehicle = charging_vehicles[connector_idx] - vehicle.charging_power = charging_power - action_index += 1 - self.check_charging_power() - self.charging_hub.grid.reset_reward() def update_pricing_agent(self): - self.update_vehicles_status() - - if not self.charging_agent: - # TODO: do we need to recalculate it? - self.charging_hub.reward["missed"] = self.reward_computing() - - agent = self.pricing_agent - agent_name = agent.agent_name - config = agent.config - - if agent_name == "SAC": - agent.conduct_action(agent.action, self.charging_hub, self.env) - eval_ep = agent.do_evaluation_iterations - - if agent.time_for_critic_and_actor_to_learn() and not eval_ep: - for _ in range(agent.hyperparameters["learning_updates_per_learning_session"]): - agent.learn() - - mask = False if agent.global_step_number >= agent.environment._max_episode_steps else agent.done + # Delegated to PricingService for backward compatibility + self.pricing_service.update_pricing_agent() - agent.save_experience( - experience=( - agent.state, - agent.action, - agent.reward, - agent.next_state, - mask, - ) - ) - - elif agent_name == "DQN": - agent.conduct_action(agent.action, self.charging_hub, self.env) - - if agent.time_for_q_network_to_learn(): - for _ in range(agent.hyperparameters["learning_iterations"]): - agent.learn() - agent.save_experience( - experience=( - agent.state, - agent.action, - agent.reward, - agent.next_state, - False, - ) - ) - agent.global_step_number += 1 - def update_storage_agent(self): - - eval_ep = self.storage_agent.do_evaluation_iterations - action = self.storage_agent.descale_action( - self.storage_agent.action, self.charging_hub - ) - self.storage_agent.conduct_action(action, self.charging_hub, self.env, eval_ep=eval_ep) - if self.storage_agent.time_for_critic_and_actor_to_learn(): - for _ in range( - self.storage_agent.hyperparameters[ - "learning_updates_per_learning_session" - ] - ): - self.storage_agent.learn() - mask = ( - False - if self.storage_agent.episode_step_number_val - >= self.storage_agent.environment._max_episode_steps - else self.storage_agent.done - ) - # if not eval_ep: - - self.storage_agent.save_experience( - experience=( - self.storage_agent.state, - action, - self.storage_agent.reward, - self.storage_agent.next_state, - mask, - ) - ) - self.storage_agent.global_step_number += 1 - self.storage_agent.step_counter += 1 - - def update_charging_agent(self): - self.update_vehicles_status() - self.charging_hub.reward["missed"] = self.reward_computing() - - eval_ep = self.charging_agent.do_evaluation_iterations - self.charging_agent.conduct_action(self.charging_agent.action, self.charging_hub, self.env) - if self.charging_agent.time_for_critic_and_actor_to_learn(): - if not eval_ep: - for _ in range( - self.charging_agent.hyperparameters[ - "learning_updates_per_learning_session" - ] - ): - self.charging_agent.learn() - mask = ( - False - if self.charging_agent.episode_step_number_val - >= self.charging_agent.environment._max_episode_steps - else self.charging_agent.done - ) - # if not eval_ep: - action = self.charging_agent.descale_action(self.charging_agent.action, self.charging_hub) - self.charging_agent.save_experience( - experience=( - self.charging_agent.state, - action, - self.charging_agent.reward, - self.charging_agent.next_state, - mask, - ) - ) - self.charging_agent.global_step_number += 1 - self.charging_agent.step_counter += 1 def get_storage_schedule(self, storage_strategy, mode): """ @@ -1376,11 +1101,17 @@ def get_hub_generation_kW(self): t = self.env.now - generation_current_period = ( - self.non_dispatchable_generator.generation_profile_actual.loc[t][ - "pv_generation" - ] - ) + # Check if the time index exists in the generation profile + if t in self.non_dispatchable_generator.generation_profile_actual.index: + generation_current_period = ( + self.non_dispatchable_generator.generation_profile_actual.loc[t][ + "pv_generation" + ] + ) + else: + # If time index doesn't exist, return 0 (no generation) + # This handles cases where simulation runs longer than available data + generation_current_period = 0.0 return generation_current_period @@ -1526,10 +1257,6 @@ def request_process(self, request): request.energy_charged += ( request.charging_power / 60 ) # sim unit time is minutes so need to divide by 60 - request.calculate_profit_reward( - self.charging_hub.penalty_for_missed_kWh, - self.electricity_tariff, - ) if request.charging_power < 0: lg.warning( f"charging power of {request.id} is negative{request.charging_power}" diff --git a/simulation/operations/pricing_service.py b/simulation/operations/pricing_service.py new file mode 100644 index 0000000..edf988d --- /dev/null +++ b/simulation/operations/pricing_service.py @@ -0,0 +1,378 @@ +import logging +from typing import Any, Dict, List, Optional, Tuple +from simpy import Environment +import pandas as pd +from resources.configuration.configuration import Configuration +from utilities.rl_environments.rl_pricing_env import convert_to_vector +from .decision_request_system import DecisionType, decision_system +from .decision_decorators import auto_register_agents + +# Set up logger for this module +logger = logging.getLogger(__name__) + +class PricingService: + """ + Encapsulates all pricing-related behavior away from Operator. + Accesses Operator state via the provided reference. + """ + + def __init__(self, operator: Any, agents_controller: Any | None = None): + self.op = operator + self.agents_controller = agents_controller + + # Register agents with the decision request system + auto_register_agents(operator) + + # Public APIs used by Operator + def take_dynamic_pricing_actions(self) -> None: + self.op.get_exp_free_grid_capacity() + self.op.update_vehicles_status() + + # Prefer external agent when available + used_agent = False + if getattr(self, "agents_controller", None) and getattr(self.agents_controller, "pricing", None): + context: Dict[str, Any] = {"charging_hub": self.op.charging_hub, "env": self.op.env} + action_dict = self.agents_controller.pricing_step(context) + if action_dict and "action" in action_dict: + action = action_dict["action"] + try: + import numpy as _np + if _np.isscalar(action): + action = convert_to_vector(int(action), h=1) + action_list = action.tolist() if hasattr(action, "tolist") else list(action) + except Exception: + action_list = [action] + + if not hasattr(self.op, "pricing_parameters") or self.op.pricing_parameters is None: + self.op.pricing_parameters = [0.0, 0.0] + if len(action_list) >= 1: + self.op.pricing_parameters[0] = action_list[0] + if len(action_list) >= 2: + if len(self.op.pricing_parameters) < 2: + self.op.pricing_parameters.append(0.0) + self.op.pricing_parameters[1] = action_list[1] + used_agent = True + + if not used_agent: + # Fallback to previous behavior + self.take_pricing_action() + + self._update_dynamic_price_history() + + def take_static_pricing_action(self) -> None: + self.op.get_exp_free_grid_capacity() + self.op.update_vehicles_status() + self._update_pricing_parameters() + self._update_static_price_history() + + def take_pricing_action(self) -> None: + # Check if we have a pricing agent and if it has an environment + if not self.op.pricing_agent: + return + + # Determine if this is an RL agent with environment or a rule-based/algorithm agent + if hasattr(self.op.pricing_agent, 'environment'): + # RL agent with environment - use existing logic + pricing_state = self.op.pricing_agent.environment.get_state(self.op.charging_hub, self.op.env) + self.op.pricing_agent.state = pricing_state + eval_ep = self.op.pricing_agent.do_evaluation_iterations + + pricing_mode = Configuration.instance().pricing_mode + agent_name = self.op.pricing_agent.agent_name + + if pricing_mode == "Discrete": + if agent_name == "DQN": + # Use decision request system for DQN pricing + action = self._get_pricing_decision_via_request(eval_ep=False) + self.op.pricing_agent.action = action + + if len(self.op.price_pairs[:, 1]) > 1: + vector_prices = convert_to_vector(self.op.pricing_agent.action) + else: + vector_prices = [self.op.pricing_agent.action] + final_pricing = self.op.pricing_agent.environment.get_final_prices_DQN(vector_prices) + for i, price in enumerate(final_pricing): + self.op.price_pairs[i, 1] = price + + elif agent_name == "SAC": + # Use decision request system for SAC pricing + action = self._get_pricing_decision_via_request(eval_ep) + self.op.pricing_agent.action = action + + rescaled_actions = self.op.pricing_agent.environment.rescale_action(self.op.pricing_agent.action) + number_of_power_options = len(self.op.price_pairs[:, 1]) + final_pricing = rescaled_actions[:number_of_power_options] + self.op.price_pairs[0, 1] = final_pricing[0] + self.op.price_pairs[1, 1] = min(final_pricing[1], 1.5) + + elif pricing_mode == "Continuous": + # Use decision request system for continuous pricing + action = self._get_pricing_decision_via_request(eval_ep) + self.op.pricing_agent.action = action + + rescaled_actions = self.op.pricing_agent.environment.rescale_action(self.op.pricing_agent.action) + + config = Configuration.instance() + if not config.dynamic_fix_term_pricing and config.capacity_pricing: + self.op.pricing_parameters[1] = rescaled_actions[0] + + elif config.dynamic_fix_term_pricing and not config.capacity_pricing: + self.op.pricing_parameters[0] = rescaled_actions[0] + if config.dynamic_parking_fee: + self.op.parking_fee = rescaled_actions[1] + + elif config.dynamic_fix_term_pricing and config.capacity_pricing: + self.op.pricing_parameters[0] = rescaled_actions[0] + self.op.pricing_parameters[1] = rescaled_actions[1] + + if config.limiting_grid_capa: + self.op.grid_capa = rescaled_actions[1] + + if config.dynamic_storage_scheduling: + self.op.storage_agent.action = [rescaled_actions[1]] + + # Use storage service instead of direct call + if hasattr(self.op, 'storage_service'): + self.op.storage_service.conduct_storage_action(given_storage_action=[rescaled_actions[1]]) + else: + # Fallback for backward compatibility + self.op.conduct_storage_action(given_storage_action=[rescaled_actions[1]]) + else: + # Rule-based or algorithm agent - use agent decision system + try: + from simulation.operations.agent_decision_system import agent_decision_system + from utilities.rl_agents.interfaces import DecisionType + + # Create context for the agent + context = { + "eval_ep": False, + "charging_hub": self.op.charging_hub, + "env": self.op.env, + "pricing_mode": Configuration.instance().pricing_mode, + "current_demand": self.op.get_hub_load_kW(), + "grid_capacity": self.op.get_exp_free_grid_capacity().free_grid_capa_actual[0] if hasattr(self.op, 'free_grid_capa_actual') and len(self.op.free_grid_capa_actual) > 0 else 1000 + } + + # Get decision from agent decision system + decision = agent_decision_system.make_decision( + DecisionType.PRICING, + context, + timeout_seconds=30.0 + ) + + # Apply the decision + if decision and decision.action: + action = decision.action + + # Extract pricing information from action + if "energy_price" in action: + # Update energy prices + energy_price = action["energy_price"] + if len(self.op.price_pairs[:, 1]) > 1: + # Multiple price levels - energy_price should be a list + if isinstance(energy_price, list): + for i, price in enumerate(energy_price): + if i < len(self.op.price_pairs[:, 1]): + self.op.price_pairs[i, 1] = price + else: + # Single price - apply to all levels + for i in range(len(self.op.price_pairs[:, 1])): + self.op.price_pairs[i, 1] = energy_price + else: + # Single price level + if isinstance(energy_price, list): + self.op.price_pairs[0, 1] = energy_price[0] + else: + self.op.price_pairs[0, 1] = energy_price + + if "parking_fee" in action: + self.op.parking_fee = action["parking_fee"] + + if "pricing_parameters" in action: + params = action["pricing_parameters"] + if len(params) > 0: + self.op.pricing_parameters[0] = params[0] # Fixed term + if len(params) > 1: + self.op.pricing_parameters[1] = params[1] # Rate-based term + + logger.info(f"Applied pricing decision: {action}") + + except Exception as e: + logger.error(f"Error using agent decision system for pricing: {e}") + logger.warning("Falling back to default pricing") + # Fallback to default pricing + pass + + # Reset reward at the end + self.op.charging_hub.grid.reset_reward() + + def update_pricing_agent(self) -> None: + self.op.update_vehicles_status() + + if not self.op.charging_agent: + self.op.charging_hub.reward["profit"] = self.op.reward_computing() + + agent = self.op.pricing_agent + if not agent: + return + + # Check if this is an RL agent with agent_name or a rule-based agent + if hasattr(agent, 'agent_name'): + # RL agent - use existing logic + agent_name = agent.agent_name + + if agent_name == "SAC": + agent.conduct_action(agent.action) + eval_ep = agent.do_evaluation_iterations + + if agent.time_for_critic_and_actor_to_learn() and not eval_ep: + for _ in range(agent.hyperparameters["learning_updates_per_learning_session"]): + agent.learn() + + mask = False if agent.global_step_number >= agent.environment.MAX_EPISODE_STEPS else agent.done + + agent.save_experience( + experience=( + agent.state, + agent.action, + agent.reward, + agent.next_state, + mask, + ) + ) + + elif agent_name == "DQN": + agent.conduct_action(agent.action) + + if agent.time_for_q_network_to_learn(): + for _ in range(agent.hyperparameters["learning_iterations"]): + agent.learn() + + agent.save_experience( + experience=( + agent.state, + agent.action, + agent.reward, + agent.next_state, + False, + ) + ) + + # Update global step number for RL agents + if hasattr(agent, 'global_step_number'): + agent.global_step_number += 1 + else: + # Rule-based or algorithm agent - no learning needed + logger.info(f"Updated {agent.__class__.__name__} (no learning required)") + + def get_current_pricing_data(self): + from dataclasses import dataclass + + @dataclass + class PricingData: + energy_price: float + parking_price: float + pricing_mode: str + price_history: pd.DataFrame + + params = getattr(self.op, "pricing_parameters", [0.0]) + return PricingData( + energy_price=params[0] if len(params) > 0 else 0.0, + parking_price=self.op.parking_fee, + pricing_mode=self.op.pricing_mode, + price_history=self.op.price_history, + ) + + def _get_pricing_decision_via_request(self, eval_ep: bool = False) -> Any: + """ + Get pricing decision through the decision request system. + + Args: + eval_ep: Whether this is an evaluation episode + + Returns: + The pricing action/decision + """ + # Create context for the decision request + context = { + "eval_ep": eval_ep, + "pricing_mode": Configuration.instance().pricing_mode, + "agent_name": self.op.pricing_agent.agent_name, + "charging_hub": self.op.charging_hub, + "env": self.op.env + } + + # Create and process decision request + request_id = decision_system.create_request( + agent_type=DecisionType.PRICING, + state=self.op.pricing_agent.state, + context=context, + metadata={ + "pricing_mode": context["pricing_mode"], + "agent_name": context["agent_name"] + } + ) + + # Process the request + response = decision_system.process_request(request_id) + + if response: + return response.action + else: + # Fallback to direct agent call if request system fails + return self.op.pricing_agent.pick_action(eval_ep) + + # Internal helpers (ported from Operator) + def _update_dynamic_price_history(self) -> None: + if self.op.pricing_mode == "Discrete": + self._add_discrete_price_to_history() + elif self.op.pricing_mode == "Continuous": + self._add_continuous_price_to_history() + + def _update_static_price_history(self) -> None: + if self.op.pricing_mode == "Discrete": + self._add_discrete_price_to_history() + elif self.op.pricing_mode in ["Continuous", "ToU"]: + self._add_continuous_price_to_history() + + def _update_pricing_parameters(self) -> None: + if self.op.pricing_mode == "ToU": + self._update_tou_pricing() + elif self.op.pricing_mode == "perfect_info": + self._update_perfect_info_pricing() + + def _update_tou_pricing(self) -> None: + hour = self._get_current_hour() + max_price = Configuration.instance().max_price_ToU + self.op.pricing_parameters[0] = ( + self.op.electricity_tariff[hour] / max(self.op.electricity_tariff) * max_price + ) + + def _update_perfect_info_pricing(self) -> None: + hour = self._get_current_hour() + config = Configuration.instance() + if config.dynamic_fix_term_pricing: + self.op.pricing_parameters[1] = self.op.price_schedules[1][hour] + self.op.pricing_parameters[0] = self.op.price_schedules[0][hour] + else: + self.op.pricing_parameters[1] = self.op.price_schedules[hour] + + def _get_current_hour(self) -> int: + return int((self.op.env.now % 1440) / 60) + + def _add_discrete_price_to_history(self) -> None: + self.op.price_history = pd.concat([ + self.op.price_history, + pd.DataFrame(self.op.price_pairs[:, 1]).transpose(), + ]) + + def _add_continuous_price_to_history(self) -> None: + self.op.price_history = pd.concat([ + self.op.price_history, + pd.DataFrame([ + self.op.pricing_parameters[0], + self.op.pricing_parameters[1] + ]).transpose(), + ]) + + diff --git a/simulation/operations/storage_service.py b/simulation/operations/storage_service.py new file mode 100644 index 0000000..d2ed782 --- /dev/null +++ b/simulation/operations/storage_service.py @@ -0,0 +1,159 @@ +from typing import List, Dict, Any, Optional +from simulation.operations.agents_controller import AgentsController +from simulation.config_facade import ConfigFacade +from .decision_request_system import DecisionType, decision_system +from .decision_decorators import auto_register_agents + + +class StorageService: + """ + Service class for managing storage-related RL agent operations. + + Encapsulates all storage agent logic that was previously in the Operator class, + providing a clean separation of concerns and standardized interface for RL agents. + """ + + def __init__(self, operator_instance: Any, agents_controller: Optional[AgentsController] = None, config_facade: Optional[ConfigFacade] = None): + """ + Initialize the StorageService. + + Args: + operator_instance: Reference to the main operator instance + agents_controller: Controller for managing RL agents + config_facade: Facade for accessing configuration values + """ + self.op = operator_instance + self.agents_controller = agents_controller + self.config = config_facade or ConfigFacade() + + # Register agents with the decision request system + auto_register_agents(operator_instance) + + def take_storage_action(self) -> None: + """ + Take storage action using the RL storage agent. + """ + if self.agents_controller and self.agents_controller.storage: + # Use the controller to get storage action + context = { + "charging_hub": self.op.charging_hub, + "env": self.op.env + } + action_result = self.agents_controller.storage_step(context) + if action_result: + self.op.storage_agent.action = action_result.get("storage_action") + else: + # Use decision request system for storage decisions + action = self._get_storage_decision_via_request() + self.op.storage_agent.action = action + + def conduct_storage_action(self, given_storage_action: Optional[List[float]] = None) -> None: + """ + Execute the storage action by applying it to the electric storage system. + + Args: + given_storage_action: Optional storage action to use instead of agent's action + """ + if given_storage_action: + storage_power = given_storage_action[0] + else: + storage_power = self.op.storage_agent.action[0] + + if storage_power >= 0: + self.op.charging_hub.electric_storage.charge_yn = 1 + self.op.charging_hub.electric_storage.charging_power = storage_power + self.op.charging_hub.electric_storage.discharge_yn = 0 + self.op.charging_hub.electric_storage.discharging_power = 0 + elif storage_power < 0: + self.op.charging_hub.electric_storage.charge_yn = 0 + self.op.charging_hub.electric_storage.charging_power = 0 + self.op.charging_hub.electric_storage.discharge_yn = 1 + self.op.charging_hub.electric_storage.discharging_power = -storage_power + + self.op.check_storage(given_storage_action=given_storage_action) + + def update_storage_agent(self) -> None: + """ + Update the storage agent with new state and experience. + """ + eval_ep = self.op.storage_agent.do_evaluation_iterations + action = self.op.storage_agent.descale_action( + self.op.storage_agent.action, self.op.charging_hub + ) + self.op.storage_agent.conduct_action(action, self.op.charging_hub, self.op.env, eval_ep=eval_ep) + if self.op.storage_agent.time_for_critic_and_actor_to_learn(): + for _ in range( + self.op.storage_agent.hyperparameters[ + "learning_updates_per_learning_session" + ] + ): + self.op.storage_agent.learn() + mask = ( + False + if self.op.storage_agent.episode_step_number_val + >= self.op.storage_agent.environment.MAX_EPISODE_STEPS + else self.op.storage_agent.done + ) + # if not eval_ep: + + self.op.storage_agent.save_experience( + experience=( + self.op.storage_agent.state, + action, + self.op.storage_agent.reward, + self.op.storage_agent.next_state, + mask, + ) + ) + self.op.storage_agent.global_step_number += 1 + self.op.storage_agent.step_counter += 1 + + def _get_storage_decision_via_request(self) -> Any: + """ + Get storage decision through the decision request system. + + Returns: + The storage action/decision + """ + # Get current state from environment + storage_state = self.op.charging_hub.storage_agent.environment.get_state( + self.op.charging_hub, self.op.env + ) + self.op.storage_agent.state = storage_state + + eval_ep = self.op.storage_agent.do_evaluation_iterations + self.op.storage_agent.episode_step_number_val = 0 + + # Create context for the decision request + context = { + "eval_ep": eval_ep, + "charging_hub": self.op.charging_hub, + "env": self.op.env + } + + # Create and process decision request + request_id = decision_system.create_request( + agent_type=DecisionType.STORAGE, + state=self.op.storage_agent.state, + context=context, + metadata={ + "agent_name": getattr(self.op.storage_agent, "agent_name", "Unknown") + } + ) + + # Process the request + response = decision_system.process_request(request_id) + + if response: + return response.action + else: + # Fallback to direct agent call if request system fails + # Handle different pick_action signatures + import inspect + sig = inspect.signature(self.op.storage_agent.pick_action) + if len(sig.parameters) > 2: # Method expects eval_ep and charging_hub parameters + return self.op.storage_agent.pick_action(eval_ep, self.op.charging_hub) + elif len(sig.parameters) > 1: # Method expects only eval_ep parameter + return self.op.storage_agent.pick_action(eval_ep) + else: # Method doesn't expect eval_ep parameter + return self.op.storage_agent.pick_action() diff --git a/simulation/preferences/vehicle.py b/simulation/preferences/vehicle.py index f2e93d1..52f408f 100644 --- a/simulation/preferences/vehicle.py +++ b/simulation/preferences/vehicle.py @@ -46,7 +46,7 @@ def __init__( self.energy_requested = self.adjust_energy_request( energy_requested_input ) # energy_requested # kWh - self.raw_energy_demand = self.energy_requested.copy() + self.raw_energy_demand = self.energy_requested # No need for .copy() on float self.energy_requested = min(self.energy_requested, self.park_duration / 60 * 50) if pd.isna(self.energy_requested): self.energy_requested = 0 @@ -109,14 +109,33 @@ def adjust_energy_request(self, energy_requested_input): :return: """ + # Handle case where arrival_period is in the future or invalid + if self.arrival_period >= self.sim_time: + # Vehicle hasn't arrived yet, no energy request + return 0.0 + + # Handle case where departure_period is in the past + if self.departure_period <= 0: + # Vehicle has already departed, no energy request + return 0.0 + + # Handle case where park_duration is invalid + if self.park_duration <= 0: + return 0.0 + if self.departure_period <= self.sim_time: + # Vehicle will depart before simulation ends, use full energy request energy_request = energy_requested_input elif self.departure_period > self.sim_time: - energy_request = energy_requested_input * ( - (self.sim_time - self.arrival_period) / self.park_duration - ) + # Vehicle will stay beyond simulation end, prorate the energy request + time_in_simulation = max(0, self.sim_time - self.arrival_period) + energy_request = energy_requested_input * (time_in_simulation / self.park_duration) + else: + # Fallback case + energy_request = energy_requested_input - return energy_request + # Ensure energy request is non-negative + return max(0.0, energy_request) def set_average_power_requirement_level(self): laxity = self.energy_requested / (self.park_duration / 60) @@ -243,11 +262,4 @@ def update_status(self): self.remaining_park_duration = max(self.departure_period - self.env.now, 1) # self.remaining_laxity = self.remaining_energy_deficit/max(self.remaining_park_duration,1) # if self.mode in ['Connected']: - # print(f'id={self.id}, power={self.charging_power}') - - def reset_profit_reward(self): - self.profit_reward = 0 - - def calculate_profit_reward(self, energy_price, electricity_tariff): - # hour = int((self.env.now % 1440 - self.env.now % 60) / 60) - self.profit_reward += self.charging_power / 60 * (energy_price) + # print(f'id={self.id}, power={self.charging_power}') \ No newline at end of file diff --git a/utilities/agent_factory.py b/utilities/agent_factory.py new file mode 100644 index 0000000..7cd4d55 --- /dev/null +++ b/utilities/agent_factory.py @@ -0,0 +1,157 @@ +""" +Agent Factory Module + +This module handles the creation and configuration of different types of agents +for the EVCC simulation framework. +""" + +from typing import Optional +from resources.configuration.configuration import Configuration +from utilities.rl_agents.interfaces import DecisionType, AgentType +from utilities.rl_agents.rule_based_agents import ( + RuleBasedPricingAgent, RuleBasedChargingAgent, RuleBasedStorageAgent +) +from utilities.rl_agents.algorithm_agents import ( + AlgorithmChargingAgent, AlgorithmRoutingAgent, AlgorithmStorageAgent +) +from utilities.rl_agents.agents.actor_critic_agents.SAC import SAC +from resources.configuration.SAC_configuration import pricing_config +from utilities.rl_environments.rl_pricing_env import PricingEnv + + +def is_agent_learnable(agent_type: str) -> bool: + """ + Determine if an agent type is learnable (RL agent). + + Args: + agent_type: String representation of agent type + + Returns: + True if the agent is learnable (RL agent), False otherwise + """ + learnable_types = ["RL_SAC", "RL_DQN", "RL_DDPG"] + return agent_type.upper() in learnable_types + + +def create_agent(decision_type: str, agent_type: str, algorithm: Optional[str] = None, + strategy: Optional[str] = None): + """ + Create an agent based on decision type and agent type. + + Args: + decision_type: Type of decision (pricing, charging, storage, routing) + agent_type: Type of agent (RL_SAC, RULE_BASED, HEURISTIC, etc.) + algorithm: Algorithm name for heuristic agents + strategy: Strategy name for agents that support different strategies + + Returns: + Agent instance + + Raises: + ValueError: If decision type or agent type is not supported + """ + if decision_type == "pricing": + return _create_pricing_agent(agent_type, strategy) + elif decision_type == "charging": + return _create_charging_agent(agent_type, algorithm, strategy) + elif decision_type == "storage": + return _create_storage_agent(agent_type, algorithm, strategy) + elif decision_type == "routing": + return _create_routing_agent(agent_type, algorithm, strategy) + else: + raise ValueError(f"Unsupported decision type: {decision_type}") + + +def _create_pricing_agent(agent_type: str, strategy: Optional[str] = None): + """Create a pricing agent.""" + if agent_type == "RL_SAC": + return _create_sac_pricing_agent() + elif agent_type == "HEURISTIC": + strategy = strategy or "time_of_use" + return RuleBasedPricingAgent(strategy=strategy) + elif agent_type == "RULE_BASED": + return RuleBasedPricingAgent(strategy=strategy or "time_of_use") + else: + raise ValueError(f"Unsupported agent type for pricing: {agent_type}") + + +def _create_charging_agent(agent_type: str, algorithm: Optional[str] = None, + strategy: Optional[str] = None): + """Create a charging agent.""" + if agent_type == "HEURISTIC": + return AlgorithmChargingAgent(algorithm=algorithm or "first_come_first_served") + elif agent_type == "RULE_BASED": + return RuleBasedChargingAgent(strategy=strategy or "first_come_first_served") + else: + raise ValueError(f"Unsupported agent type for charging: {agent_type}") + + +def _create_storage_agent(agent_type: str, algorithm: Optional[str] = None, + strategy: Optional[str] = None): + """Create a storage agent.""" + if agent_type == "HEURISTIC": + return AlgorithmStorageAgent(algorithm=algorithm or "peak_shaving") + elif agent_type == "RULE_BASED": + return RuleBasedStorageAgent(strategy=strategy or "peak_shaving") + else: + raise ValueError(f"Unsupported agent type for storage: {agent_type}") + + +def _create_routing_agent(agent_type: str, algorithm: Optional[str] = None, + strategy: Optional[str] = None): + """Create a routing agent.""" + if agent_type == "HEURISTIC": + return AlgorithmRoutingAgent(algorithm=algorithm or "lowest_occupancy_first") + elif agent_type == "RULE_BASED": + return AlgorithmRoutingAgent(algorithm=strategy or "lowest_occupancy_first") + else: + raise ValueError(f"Unsupported agent type for routing: {agent_type}") + + +def _create_sac_pricing_agent(): + """Create and configure a SAC pricing agent.""" + config = Configuration.instance() + + # Configure pricing environment for RL agent + pricing_config.number_chargers = config.facility_size + pricing_config.maximum_power = 50 + pricing_config.maximum_grid_usage = 2000 + pricing_config.number_power_options = len(config.energy_prices) + pricing_config.environment = PricingEnv(config=pricing_config, DQN=False) + pricing_config.learnt_network = config.evaluation_after_training + pricing_config.evaluation_after_training = config.evaluation_after_training + + return SAC(pricing_config) + + +def get_agent_configuration(config: Configuration) -> dict: + """ + Get agent configuration from the main configuration. + + Args: + config: Configuration instance + + Returns: + Dictionary containing agent configurations + """ + return { + "pricing": { + "agent_type": getattr(config, 'default_agent_types', {}).get("pricing"), + "strategy": getattr(config, 'default_strategies', {}).get("pricing", "time_of_use") + }, + "charging": { + "agent_type": getattr(config, 'default_agent_types', {}).get("charging"), + "algorithm": getattr(config, 'default_algorithms', {}).get("charging", "first_come_first_served"), + "strategy": getattr(config, 'default_strategies', {}).get("charging", "first_come_first_served") + }, + "storage": { + "agent_type": getattr(config, 'default_agent_types', {}).get("storage"), + "algorithm": getattr(config, 'default_algorithms', {}).get("storage", "peak_shaving"), + "strategy": getattr(config, 'default_strategies', {}).get("storage", "peak_shaving") + }, + "routing": { + "agent_type": getattr(config, 'default_agent_types', {}).get("routing"), + "algorithm": getattr(config, 'default_algorithms', {}).get("routing", "lowest_occupancy_first"), + "strategy": getattr(config, 'default_strategies', {}).get("routing", "lowest_occupancy_first") + } + } diff --git a/utilities/hyperparameter_tuner.py b/utilities/hyperparameter_tuner.py new file mode 100644 index 0000000..b429ddd --- /dev/null +++ b/utilities/hyperparameter_tuner.py @@ -0,0 +1,148 @@ +""" +Hyperparameter Tuner Module + +This module handles hyperparameter tuning for learnable agents in the EVCC simulation framework. +""" + +from typing import Dict, Any +from resources.configuration.configuration import Configuration +from utilities.training_manager import run_standard_training +import pandas as pd +import numpy as np + + +def find_best_parameters(agent, config: Configuration) -> None: + """ + Find best hyperparameters for the agent through grid search. + + Args: + agent: The agent instance to tune + config: Configuration instance + """ + print("Starting hyperparameter tuning...") + + # Try to read existing training results + try: + training_results = pd.read_csv(f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}.csv') + except: + training_results = pd.DataFrame(columns=['learning_rate', 'batch_size', 'tau', 'result']) + + best_results = -10000000000 + best_parameters = {'learning_rate': 0, 'batch_size': 0, 'tau': 0} + + # Hyperparameter grid to search + learning_rates = [5e-5, 1e-4, 5e-4, 1e-3] + batch_sizes = [64, 256, 512] + tau_values = [0.05, 0.1] + + total_combinations = len(learning_rates) * len(batch_sizes) * len(tau_values) + current_combination = 0 + + for lr in learning_rates: + for bs in batch_sizes: + for tau in tau_values: + current_combination += 1 + print(f"Testing combination {current_combination}/{total_combinations}: lr={lr}, bs={bs}, tau={tau}") + + # Update agent hyperparameters + if hasattr(agent, 'hyperparameters'): + agent.hyperparameters['batch_size'] = bs + if 'Actor' in agent.hyperparameters: + agent.hyperparameters['Actor']['learning_rate'] = lr + if 'Critic' in agent.hyperparameters: + agent.hyperparameters['Critic']['learning_rate'] = lr + agent.hyperparameters['Critic']['tau'] = tau + if 'Actor' in agent.hyperparameters: + agent.hyperparameters['Actor']['tau'] = tau + agent.hyperparameters['min_steps_before_learning'] = max(bs, 256) + + # Run training experiment + try: + mean_reward = run_standard_training(agent, "pricing", config, return_rewards=True) + + # Track results + hyperparameters = {'learning_rate': lr, 'batch_size': bs, 'tau': tau} + if np.array(mean_reward).mean() > best_results: + best_results = np.array(mean_reward).mean() + best_parameters = hyperparameters + print(f"New best result: {best_results} with parameters: {best_parameters}") + + # Save results + results_dict = {'result': mean_reward} + new_row = pd.DataFrame([[lr, bs, tau, mean_reward]], columns=training_results.columns) + training_results = pd.concat([new_row, training_results], ignore_index=True) + + # Save to CSV + training_results.to_csv( + f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}_tuning.csv', + index=False + ) + + print(f'Parameters: {hyperparameters}, Results: {results_dict}') + print(f'Best so far: {best_results}, Best parameters: {best_parameters}') + + except Exception as e: + print(f"Error during hyperparameter tuning for {hyperparameters}: {e}") + continue + + print(f"\nHyperparameter tuning completed!") + print(f"Best result: {best_results}") + print(f"Best parameters: {best_parameters}") + + # Save final best parameters + best_params_df = pd.DataFrame([best_parameters]) + best_params_df.to_csv( + f'{config.OUTPUT_DATA_PATH}best_parameters_{agent.config.name}.csv', + index=False + ) + + +def get_hyperparameter_grid() -> Dict[str, list]: + """ + Get the default hyperparameter grid for tuning. + + Returns: + Dictionary containing hyperparameter grids + """ + return { + 'learning_rates': [5e-5, 1e-4, 5e-4, 1e-3], + 'batch_sizes': [64, 256, 512], + 'tau_values': [0.05, 0.1] + } + + +def update_agent_hyperparameters(agent, hyperparameters: Dict[str, Any]) -> None: + """ + Update agent hyperparameters safely. + + Args: + agent: The agent instance to update + hyperparameters: Dictionary of hyperparameters to update + """ + if not hasattr(agent, 'hyperparameters'): + print("Warning: Agent does not have hyperparameters attribute") + return + + # Update batch size + if 'batch_size' in hyperparameters: + agent.hyperparameters['batch_size'] = hyperparameters['batch_size'] + + # Update learning rates + if 'learning_rate' in hyperparameters: + lr = hyperparameters['learning_rate'] + if 'Actor' in agent.hyperparameters: + agent.hyperparameters['Actor']['learning_rate'] = lr + if 'Critic' in agent.hyperparameters: + agent.hyperparameters['Critic']['learning_rate'] = lr + + # Update tau values + if 'tau' in hyperparameters: + tau = hyperparameters['tau'] + if 'Actor' in agent.hyperparameters: + agent.hyperparameters['Actor']['tau'] = tau + if 'Critic' in agent.hyperparameters: + agent.hyperparameters['Critic']['tau'] = tau + + # Update min steps before learning + if 'batch_size' in hyperparameters: + agent.hyperparameters['min_steps_before_learning'] = max(hyperparameters['batch_size'], 256) diff --git a/utilities/rl_agents/adapters/charging_adapter.py b/utilities/rl_agents/adapters/charging_adapter.py new file mode 100644 index 0000000..a1c1860 --- /dev/null +++ b/utilities/rl_agents/adapters/charging_adapter.py @@ -0,0 +1,80 @@ +from typing import Any, Dict, List, Optional +from utilities.rl_agents.interfaces import ChargingAgent + + +class ChargingEnvAgentAdapter(ChargingAgent): + """ + Adapter to wrap existing RL charging agents and environments to conform to the ChargingAgent interface. + + This adapter provides a standardized interface for charging agents, allowing easy swapping + of different RL algorithms while maintaining compatibility with the charging service. + """ + + def __init__(self, rl_agent: Any, charging_env: Any): + """ + Initialize the charging agent adapter. + + Args: + rl_agent: The underlying RL agent (e.g., SAC, DQN) + charging_env: The charging environment (e.g., ChargingHubInvestmentEnv) + """ + self.rl_agent = rl_agent + self.charging_env = charging_env + self.current_state = None + self.current_action = None + self.current_reward = None + self.next_state = None + self.done = False + + def reset(self) -> None: + """Reset the agent and environment.""" + self.current_state = self.charging_env.reset() + self.rl_agent.reset_game() + + def update_state(self, context: Dict[str, Any]) -> None: + """ + Update the agent's state based on the current context. + + Args: + context: Dictionary containing charging_hub and env + """ + charging_hub = context.get("charging_hub") + env = context.get("env") + self.current_state = self.charging_env.get_state(charging_hub, env) + self.rl_agent.state = self.current_state + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select a charging action based on current state and vehicles. + + Args: + vehicles: List of vehicles to consider for charging + context: Dictionary containing charging_hub and env + + Returns: + Dictionary containing the selected charging action + """ + charging_hub = context.get("charging_hub") + eval_ep = self.rl_agent.do_evaluation_iterations + self.rl_agent.episode_step_number_val = 0 + + # Get action from the RL agent + action_raw = self.rl_agent.pick_action(eval_ep) + self.current_action = action_raw + + # Rescale action if needed + rescaled_action = self.rl_agent.rescale_action(action_raw) + + return {"charging_action": rescaled_action} + + def learn(self, transition: Dict[str, Any]) -> None: + """ + Learn from the transition experience. + + Args: + transition: Dictionary containing state, action, reward, next_state, done + """ + # The RL agent's internal learn method is typically called + # by the agent itself after its conduct_action. + # If explicit learning is needed, it would be handled here. + pass diff --git a/utilities/rl_agents/adapters/gym_agent_adapter.py b/utilities/rl_agents/adapters/gym_agent_adapter.py new file mode 100644 index 0000000..8a70c9c --- /dev/null +++ b/utilities/rl_agents/adapters/gym_agent_adapter.py @@ -0,0 +1,180 @@ +from typing import Any, Dict, List, Optional, Union +import numpy as np +from utilities.rl_agents.interfaces import PricingAgent, ChargingAgent, StorageAgent +from utilities.rl_environments.evch_gym_env import EVCHGymEnv, AgentType + + +class GymAgentAdapter: + """ + Adapter for standard gym-compatible RL agents to work with EVCH simulation. + + This adapter allows any gym-compatible RL agent (Stable Baselines3, RLlib, etc.) + to be used with the EVCH simulation by providing a standardized interface. + """ + + def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any): + """ + Initialize the gym agent adapter. + + Args: + gym_env: The EVCH gym environment + gym_agent: The gym-compatible RL agent (must have predict() method) + """ + self.gym_env = gym_env + self.gym_agent = gym_agent + self.current_state = None + self.current_action = None + self.current_reward = None + self.next_state = None + self.done = False + + # Validate that the agent has the required methods + if not hasattr(self.gym_agent, 'predict'): + raise ValueError("Gym agent must have a 'predict' method") + + def reset(self) -> None: + """Reset the agent and environment.""" + self.current_state, _ = self.gym_env.reset() + if hasattr(self.gym_agent, 'reset'): + self.gym_agent.reset() + + def update_state(self, context: Dict[str, Any]) -> None: + """ + Update the agent's state based on the current context. + + Args: + context: Dictionary containing charging_hub and env + """ + # Set the simulation context in the gym environment + charging_hub = context.get("charging_hub") + sim_env = context.get("env") + if charging_hub and sim_env: + self.gym_env.set_simulation_context(charging_hub, sim_env) + + # Get current state from the gym environment + self.current_state = self.gym_env._get_state() + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select an action using the gym agent. + + Args: + context: Dictionary containing charging_hub and env + + Returns: + Dictionary containing the selected action + """ + # Update state first + self.update_state(context) + + # Use the gym agent to predict action + if hasattr(self.gym_agent, 'predict'): + # Standard gym agent interface + action, _ = self.gym_agent.predict(self.current_state, deterministic=True) + elif hasattr(self.gym_agent, 'act'): + # Alternative interface + action = self.gym_agent.act(self.current_state) + else: + # Fallback: assume agent is callable + action = self.gym_agent(self.current_state) + + self.current_action = action + + # Return action in the format expected by the service + if self.gym_env.agent_type == AgentType.PRICING: + return {"pricing_parameters": action} + elif self.gym_env.agent_type == AgentType.CHARGING: + return {"charging_action": action} + elif self.gym_env.agent_type == AgentType.STORAGE: + return {"storage_action": action} + else: + return {"action": action} + + def learn(self, transition: Dict[str, Any]) -> None: + """ + Learn from the transition experience. + + Args: + transition: Dictionary containing state, action, reward, next_state, done + """ + # Most gym agents handle learning internally during training + # This method is called for compatibility but may not be used + pass + + def train(self, total_timesteps: int = 1000) -> None: + """ + Train the gym agent. + + Args: + total_timesteps: Number of timesteps to train for + """ + if hasattr(self.gym_agent, 'learn'): + self.gym_agent.learn(total_timesteps=total_timesteps) + else: + raise NotImplementedError("Gym agent does not have a 'learn' method") + + +class GymPricingAgentAdapter(GymAgentAdapter, PricingAgent): + """Adapter for gym agents used as pricing agents.""" + + def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any): + super().__init__(gym_env, gym_agent) + if gym_env.agent_type != AgentType.PRICING: + raise ValueError("Gym environment must be configured for pricing agent") + + +class GymChargingAgentAdapter(GymAgentAdapter, ChargingAgent): + """Adapter for gym agents used as charging agents.""" + + def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any): + super().__init__(gym_env, gym_agent) + if gym_env.agent_type != AgentType.CHARGING: + raise ValueError("Gym environment must be configured for charging agent") + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select charging action based on vehicles and context. + + Args: + vehicles: List of vehicles to consider for charging + context: Dictionary containing charging_hub and env + + Returns: + Dictionary containing the selected charging action + """ + # Add vehicles to context for the gym environment + context_with_vehicles = context.copy() + context_with_vehicles["vehicles"] = vehicles + + return super().select_action(context_with_vehicles) + + +class GymStorageAgentAdapter(GymAgentAdapter, StorageAgent): + """Adapter for gym agents used as storage agents.""" + + def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any): + super().__init__(gym_env, gym_agent) + if gym_env.agent_type != AgentType.STORAGE: + raise ValueError("Gym environment must be configured for storage agent") + + +# Factory functions for easy adapter creation +def create_gym_pricing_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymPricingAgentAdapter: + """Create a gym pricing agent adapter.""" + from utilities.rl_environments.evch_gym_env import make_pricing_env + gym_env = make_pricing_env(config_dict, **kwargs) + return GymPricingAgentAdapter(gym_env, gym_agent) + + +def create_gym_charging_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymChargingAgentAdapter: + """Create a gym charging agent adapter.""" + from utilities.rl_environments.evch_gym_env import make_charging_env + gym_env = make_charging_env(config_dict, **kwargs) + return GymChargingAgentAdapter(gym_env, gym_agent) + + +def create_gym_storage_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymStorageAgentAdapter: + """Create a gym storage agent adapter.""" + from utilities.rl_environments.evch_gym_env import make_storage_env + gym_env = make_storage_env(config_dict, **kwargs) + return GymStorageAgentAdapter(gym_env, gym_agent) diff --git a/utilities/rl_agents/adapters/pricing_adapter.py b/utilities/rl_agents/adapters/pricing_adapter.py new file mode 100644 index 0000000..381d4ec --- /dev/null +++ b/utilities/rl_agents/adapters/pricing_adapter.py @@ -0,0 +1,42 @@ +from typing import Any, Dict + +from utilities.rl_agents.interfaces import PricingAgent as PricingAgentInterface +from utilities.rl_environments.rl_pricing_env import PricingEnv + + +class PricingEnvAgentAdapter(PricingAgentInterface): + """ + Adapter that wires an underlying RL policy to the PricingEnv API. + The underlying policy must implement: reset(), act(obs) -> action, learn(transition) + """ + + def __init__(self, env: PricingEnv, policy: Any): + self.env = env + self.policy = policy + self._last_obs = None + + def reset(self) -> None: + if hasattr(self.policy, "reset"): + self.policy.reset() + self._last_obs = self.env.reset() + + def update_state(self, context: Dict[str, Any]) -> None: + charging_hub = context.get("charging_hub") + sim_env = context.get("env") + self._last_obs = self.env.get_state(charging_hub, sim_env) + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + if self._last_obs is None: + self.update_state(context) + if hasattr(self.policy, "act"): + action = self.policy.act(self._last_obs) + else: + # Fallback: assume policy is callable + action = self.policy(self._last_obs) + return {"action": action} + + def learn(self, transition: Dict[str, Any]) -> None: + if hasattr(self.policy, "learn"): + self.policy.learn(transition) + + diff --git a/utilities/rl_agents/adapters/storage_adapter.py b/utilities/rl_agents/adapters/storage_adapter.py new file mode 100644 index 0000000..0dd5266 --- /dev/null +++ b/utilities/rl_agents/adapters/storage_adapter.py @@ -0,0 +1,76 @@ +from typing import Any, Dict, List, Optional +from utilities.rl_agents.interfaces import StorageAgent + + +class StorageEnvAgentAdapter(StorageAgent): + """ + Adapter to wrap existing RL storage agents and environments to conform to the StorageAgent interface. + + This adapter provides a standardized interface for storage agents, allowing easy swapping + of different RL algorithms while maintaining compatibility with the storage service. + """ + + def __init__(self, rl_agent: Any, storage_env: Any): + """ + Initialize the storage agent adapter. + + Args: + rl_agent: The underlying RL agent (e.g., SAC, DQN) + storage_env: The storage environment (e.g., StorageEnv) + """ + self.rl_agent = rl_agent + self.storage_env = storage_env + self.current_state = None + self.current_action = None + self.current_reward = None + self.next_state = None + self.done = False + + def reset(self) -> None: + """Reset the agent and environment.""" + self.current_state = self.storage_env.reset() + self.rl_agent.reset_game() + + def update_state(self, context: Dict[str, Any]) -> None: + """ + Update the agent's state based on the current context. + + Args: + context: Dictionary containing charging_hub and env + """ + charging_hub = context.get("charging_hub") + env = context.get("env") + self.current_state = self.storage_env.get_state(charging_hub, env) + self.rl_agent.state = self.current_state + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select a storage action based on current state. + + Args: + context: Dictionary containing charging_hub and env + + Returns: + Dictionary containing the selected storage action + """ + charging_hub = context.get("charging_hub") + eval_ep = self.rl_agent.do_evaluation_iterations + self.rl_agent.episode_step_number_val = 0 + + # Get action from the RL agent + action_raw = self.rl_agent.pick_action(eval_ep) + self.current_action = action_raw + + return {"storage_action": action_raw} + + def learn(self, transition: Dict[str, Any]) -> None: + """ + Learn from the transition experience. + + Args: + transition: Dictionary containing state, action, reward, next_state, done + """ + # The RL agent's internal learn method is typically called + # by the agent itself after its conduct_action. + # If explicit learning is needed, it would be handled here. + pass diff --git a/utilities/rl_agents/agents/Base_Agent.py b/utilities/rl_agents/agents/Base_Agent.py index de5a58b..faaff8b 100644 --- a/utilities/rl_agents/agents/Base_Agent.py +++ b/utilities/rl_agents/agents/Base_Agent.py @@ -256,11 +256,9 @@ def run_n_episodes( # if self.config.save_model: self.locally_save_policy() return self.game_full_episode_scores, self.rolling_results, time_taken - def conduct_action(self, action, charging_hub, env): + def conduct_action(self, action): """Conducts an action in the environment""" - self.next_state, self.reward, self.done, _ = self.environment.step( - action, charging_hub, env - ) + self.next_state, self.reward, self.done, _ = self.environment.step(action) self.total_episode_score_so_far += self.reward if self.hyperparameters["clip_rewards"]: self.reward = max(min(self.reward, 1.0), -1.0) diff --git a/utilities/rl_agents/agents/actor_critic_agents/SAC.py b/utilities/rl_agents/agents/actor_critic_agents/SAC.py index 226247f..6e69333 100644 --- a/utilities/rl_agents/agents/actor_critic_agents/SAC.py +++ b/utilities/rl_agents/agents/actor_critic_agents/SAC.py @@ -229,8 +229,7 @@ def reset_game(self): if self.add_extra_noise: self.noise.reset() - def step(self, charging_hub, env): - pass + def step(self): """Runs an episode on the game, saving the experience and running a learning step if appropriate""" eval_ep = ( self.episode_number % TRAINING_EPISODES_PER_EVAL_EPISODE == 0 @@ -238,8 +237,8 @@ def step(self, charging_hub, env): ) self.episode_step_number_val = 0 # while not self.done: - self.action = self.pick_action(eval_ep, charging_hub) - self.conduct_action(self.action, charging_hub, env) + self.action = self.pick_action(eval_ep) + self.conduct_action(self.action) if self.time_for_critic_and_actor_to_learn(): for _ in range( self.hyperparameters["learning_updates_per_learning_session"] @@ -257,110 +256,22 @@ def step(self, charging_hub, env): self.state = self.next_state self.global_step_number += 1 - # print(self.pick_action(eval_ep, charging_hub, state=self.environment.get_state(None, None))) - def rescale_action(self, action): return ( action * (self.action_range[1] - self.action_range[0]) / 2.0 + (self.action_range[1] + self.action_range[0]) / 2.0 ) - def descale_action(self, action, charging_hub): + def descale_action(self, action): actions = (action - ((self.action_range[1] + self.action_range[0]) / 2.0)) / ( (self.action_range[1] - self.action_range[0]) / 2 ) return actions - def penalty_action(self, action, charging_hub): - vehicle_state = self.state[24 + 5 + 5 :] - ### check charging action - total_usage = np.array([]) - i = 0 - for charger in charging_hub.chargers: - associated_power = np.array([]) - for j in range(charger.number_of_connectors): - maximum_power = charger.power - if vehicle_state[i * 3] <= 0: - charging_hub.reward["feasibility"] += action[i + 1] - else: - associated_power = np.append(associated_power, action[i + 1]) - total_usage = np.append(total_usage, action[i + 1]) - i += 1 - surplus_per_charger = max(associated_power.sum() - maximum_power, 0) - charging_hub.reward["feasibility"] += surplus_per_charger - total_surplus = max( - total_usage.sum() - charging_hub.operator.free_grid_capa_actual[0], 0 - ) - charging_hub.reward["feasibility"] += total_surplus - - def checked_action(self, action, charging_hub): - vehicle_state = self.state[24 + 5 + 5 :] - ### check charging action - i = 0 - for charger in charging_hub.chargers: - lower_bound = i + 1 - for j in range(charger.number_of_connectors): - maximum_power = charger.power - if vehicle_state[i * 3] <= 0: - action[i + 1] = 0 - i += 1 - upper_bound = i + 1 - - while action[lower_bound:upper_bound].sum() > maximum_power: - number_active_chargers = len( - [f for f in action[lower_bound:upper_bound] if f > 0] - ) - surplus_per_charger = ( - max(action[lower_bound:upper_bound].sum() - maximum_power, 0) - / number_active_chargers - ) - action[lower_bound:upper_bound] -= surplus_per_charger - for c in range(len(action[lower_bound:upper_bound])): - action[lower_bound:upper_bound][c] = max( - action[lower_bound:upper_bound][c], 0 - ) - - storage_object = charging_hub.electric_storage - storage_object.SoC = min( - storage_object.SoC, storage_object.max_energy_stored_kWh - ) - storage_object.SoC = max(storage_object.SoC, 0) - if action[0] >= 0: - if ( - storage_object.SoC + action[0] / 60 * charging_hub.planning_interval - > storage_object.max_energy_stored_kWh - ): - action[0] = ( - storage_object.max_energy_stored_kWh - storage_object.SoC - ) / (60 * charging_hub.planning_interval) - action[0] = min(action[0], charging_hub.operator.free_grid_capa_actual[0]) - - # discharge rate cannot exceed SoC, and hub demand (i.e., no infeed) - if action[0] < 0: - if storage_object.SoC <= 0: - action[0] = 0 - elif ( - storage_object.SoC + (action[0] / 60 * charging_hub.planning_interval) - < 0 - ): - action[0] = -max( - (storage_object.SoC) / (60 * charging_hub.planning_interval), 0 - ) - while action.sum() - charging_hub.operator.free_grid_capa_actual[0] > 0: - number_active_chargers = len([a for a in action if a > 0]) - surplus_per_charger = ( - max(action.sum() - charging_hub.operator.free_grid_capa_actual[0], 0) - / number_active_chargers - ) - for i in range(1, len(action)): - action[i] = max(action[i] - surplus_per_charger, 0) - # if action[0]>0: - # action[0] = max(action[0] - surplus_per_charger, 0) - return action - def pick_action(self, eval_ep, charging_hub=None, state=None): + def pick_action(self, eval_ep, state=None): """Picks an action using one of three methods: 1) Randomly if we haven't passed a certain number of steps, 2) Using the actor in evaluation mode if eval_ep is True 3) Using the actor in training mode if eval_ep is False. The difference between evaluation and training mode is that training mode does more exploration @@ -376,13 +287,13 @@ def pick_action(self, eval_ep, charging_hub=None, state=None): # action[i] = max(action[i], -1) # action[i] = min(action[i], 1) # action = self.rescale_action(action) - # action = self.checked_action(action, charging_hub) + # action = self.checked_action(action) elif ( self.global_step_number < self.hyperparameters["min_steps_before_learning"] ): action = self.environment.action_space.sample().astype("float64") - action = self.descale_action(action, charging_hub) - # action = self.checked_action(action, charging_hub) + action = self.descale_action(action) + # action = self.checked_action(action) else: action = self.actor_pick_action(state=state) if self.add_extra_noise: @@ -392,7 +303,7 @@ def pick_action(self, eval_ep, charging_hub=None, state=None): action[i] = max(action[i], -1) action[i] = min(action[i], 1) # action = self.rescale_action(action) - # action = self.checked_action(action, charging_hub) + # action = self.checked_action(action) return action def actor_pick_action(self, state=None, eval=False): diff --git a/utilities/rl_agents/algorithm_agents.py b/utilities/rl_agents/algorithm_agents.py new file mode 100644 index 0000000..17155d7 --- /dev/null +++ b/utilities/rl_agents/algorithm_agents.py @@ -0,0 +1,682 @@ +from typing import Any, Dict, List, Optional +import numpy as np +from datetime import datetime + +from utilities.rl_agents.interfaces import ( + BaseAgent, + DecisionType, + AgentType, + PricingAgent, + ChargingAgent, + StorageAgent, + RoutingAgent, + VehicleAssignmentAgent +) + +# Import existing algorithms +from simulation.operations.ChargingAlgorithms import ( + uncontrolled as charging_uncontrolled, + first_come_first_served, + earliest_deadline_first, + least_laxity_first, + equal_sharing, + online_myopic, + online_multi_period, + integrated_charging_storage +) + +from simulation.operations.RoutingAlgorithms import ( + random_charger_assignment, + lowest_occupancy_first_charger_assignment, + fill_one_after_other_charger_assignment, + lowest_utilization_first_charger_assignment, + matching_supply_demand_level, + assign_to_the_minimum_power +) + +from simulation.operations.StorageAlgorithms import ( + uncontrolled as storage_uncontrolled, + temporal_arbitrage, + peak_shaving +) + +from simulation.operations.IntegratedAlgorithms import ( + perfect_info_charging_routing, + perfect_info_charging_routing_storage +) + + +class AlgorithmChargingAgent(ChargingAgent): + """ + Agent that wraps existing charging algorithms. + + This agent provides a standardized interface to all the existing + charging algorithms in the codebase. + """ + + def __init__(self, algorithm: str = "first_come_first_served"): + self.algorithm = algorithm + self.state = None + self._agent_type = AgentType.HEURISTIC + self._decision_type = DecisionType.CHARGING + + # Algorithm mapping + self.algorithm_functions = { + "uncontrolled": self._uncontrolled_charging, + "first_come_first_served": self._first_come_first_served, + "earliest_deadline_first": self._earliest_deadline_first, + "least_laxity_first": self._least_laxity_first, + "equal_sharing": self._equal_sharing, + "online_myopic": self._online_myopic, + "online_multi_period": self._online_multi_period, + "integrated_storage": self._integrated_storage, + "perfect_info": self._perfect_info, + "perfect_info_with_storage": self._perfect_info_with_storage + } + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + self.state = context + + def get_state(self) -> Any: + return self.state + + def set_state(self, state: Any) -> None: + self.state = state + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select charging action using the specified algorithm. + + Args: + vehicles: List of vehicles requiring charging + context: Dictionary containing charging context + + Returns: + Dictionary containing charging decision + """ + if self.algorithm not in self.algorithm_functions: + raise ValueError(f"Unknown charging algorithm: {self.algorithm}") + + # Get algorithm function + algo_func = self.algorithm_functions[self.algorithm] + + # Execute algorithm + charging_actions = algo_func(vehicles, context) + + return { + "charging_actions": charging_actions, + "power_allocation": self.algorithm, + "priority_order": list(range(len(vehicles))), + "confidence": 0.9, + "strategy": self.algorithm, + "reasoning": f"Applied {self.algorithm} charging algorithm to {len(vehicles)} vehicles" + } + + def _uncontrolled_charging(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Uncontrolled charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + charging_capacity = context.get("charging_capacity", 500) + free_grid_capacity = context.get("free_grid_capacity", 500) + planning_period_length = context.get("planning_period_length", 15) + + # Execute algorithm + charging_uncontrolled( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=charging_capacity, + free_grid_capacity=free_grid_capacity, + planning_period_length=planning_period_length + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _first_come_first_served(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """First come first served charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + charging_capacity = context.get("charging_capacity", 500) + free_grid_capacity = context.get("free_grid_capacity", 500) + planning_period_length = context.get("planning_period_length", 15) + + # Execute algorithm + first_come_first_served( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=charging_capacity, + free_grid_capacity=free_grid_capacity, + planning_period_length=planning_period_length + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _earliest_deadline_first(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Earliest deadline first charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + charging_capacity = context.get("charging_capacity", 500) + free_grid_capacity = context.get("free_grid_capacity", 500) + planning_period_length = context.get("planning_period_length", 15) + + # Execute algorithm + earliest_deadline_first( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=charging_capacity, + free_grid_capacity=free_grid_capacity, + planning_period_length=planning_period_length + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _least_laxity_first(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Least laxity first charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + charging_capacity = context.get("charging_capacity", 500) + free_grid_capacity = context.get("free_grid_capacity", 500) + planning_period_length = context.get("planning_period_length", 15) + + # Execute algorithm + least_laxity_first( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=charging_capacity, + free_grid_capacity=free_grid_capacity, + planning_period_length=planning_period_length + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _equal_sharing(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Equal sharing charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + charging_capacity = context.get("charging_capacity", 500) + free_grid_capacity = context.get("free_grid_capacity", 500) + planning_period_length = context.get("planning_period_length", 15) + + # Execute algorithm + equal_sharing( + env=env, + connected_vehicles=vehicles, + charging_stations=charging_stations, + charging_capacity=charging_capacity, + free_grid_capacity=free_grid_capacity, + planning_period_length=planning_period_length + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _online_myopic(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Online myopic charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + electricity_cost = context.get("electricity_cost", [0.15] * 24) + sim_time = context.get("sim_time", 1440) + peak_load_history = context.get("peak_load_history", []) + free_grid_capa_actual = context.get("free_grid_capa_actual", 500) + free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500) + + # Execute algorithm + online_myopic( + vehicles=vehicles, + charging_stations=charging_stations, + env=env, + electricity_cost=electricity_cost, + sim_time=sim_time, + peak_load_history=peak_load_history, + free_grid_capa_actual=free_grid_capa_actual, + free_grid_capa_predicted=free_grid_capa_predicted + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _online_multi_period(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Online multi-period charging algorithm.""" + env = context.get("env") + charging_stations = context.get("charging_stations", []) + electricity_cost = context.get("electricity_cost", [0.15] * 24) + sim_time = context.get("sim_time", 1440) + peak_load_history = context.get("peak_load_history", []) + free_grid_capa_actual = context.get("free_grid_capa_actual", 500) + free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500) + + # Execute algorithm + online_multi_period( + vehicles=vehicles, + charging_stations=charging_stations, + env=env, + electricity_cost=electricity_cost, + sim_time=sim_time, + peak_load_history=peak_load_history, + free_grid_capa_actual=free_grid_capa_actual, + free_grid_capa_predicted=free_grid_capa_predicted + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _integrated_storage(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Integrated charging and storage algorithm.""" + storage = context.get("storage") + charging_stations = context.get("charging_stations", []) + env = context.get("env") + electricity_cost = context.get("electricity_cost", [0.15] * 24) + sim_time = context.get("sim_time", 1440) + peak_load_history = context.get("peak_load_history", []) + free_grid_capa_actual = context.get("free_grid_capa_actual", 500) + free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500) + + # Execute algorithm + integrated_charging_storage( + storage=storage, + vehicles=vehicles, + charging_stations=charging_stations, + env=env, + electricity_cost=electricity_cost, + sim_time=sim_time, + peak_load_history=peak_load_history, + free_grid_capa_actual=free_grid_capa_actual, + free_grid_capa_predicted=free_grid_capa_predicted + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _perfect_info(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Perfect information charging and routing algorithm.""" + charging_stations = context.get("charging_stations", []) + env = context.get("env") + grid_capacity = context.get("grid_capacity", 500) + electricity_cost = context.get("electricity_cost", [0.15] * 24) + sim_time = context.get("sim_time", 1440) + baseload = context.get("baseload", 100) + generation = context.get("generation") + service_level = context.get("service_level", 1) + time_range = context.get("time_range", 24) + + # Execute algorithm + perfect_info_charging_routing( + vehicles=vehicles, + charging_stations=charging_stations, + env=env, + grid_capacity=grid_capacity, + electricity_cost=electricity_cost, + sim_time=sim_time, + baseload=baseload, + generation=generation, + service_level=service_level, + time_range=time_range + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def _perfect_info_with_storage(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]: + """Perfect information charging, routing, and storage algorithm.""" + charging_stations = context.get("charging_stations", []) + env = context.get("env") + grid_capacity = context.get("grid_capacity", 500) + electricity_cost = context.get("electricity_cost", [0.15] * 24) + sim_time = context.get("sim_time", 1440) + baseload = context.get("baseload", 100) + storage = context.get("storage") + service_level = context.get("service_level", 1) + time_range = context.get("time_range", 24 * 5) + + # Execute algorithm + perfect_info_charging_routing_storage( + vehicles=vehicles, + charging_stations=charging_stations, + env=env, + grid_capacity=grid_capacity, + electricity_cost=electricity_cost, + sim_time=sim_time, + baseload=baseload, + storage=storage, + service_level=service_level, + time_range=time_range + ) + + # Extract charging actions + return [vehicle.charging_power for vehicle in vehicles] + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Algorithm-based agents don't learn from transitions.""" + pass + + +class AlgorithmRoutingAgent(RoutingAgent): + """ + Agent that wraps existing routing algorithms. + + This agent provides a standardized interface to all the existing + routing algorithms in the codebase. + """ + + def __init__(self, algorithm: str = "lowest_occupancy_first"): + self.algorithm = algorithm + self.state = None + self._agent_type = AgentType.HEURISTIC + self._decision_type = DecisionType.ROUTING + + # Algorithm mapping + self.algorithm_functions = { + "random": self._random_routing, + "lowest_occupancy_first": self._lowest_occupancy_first, + "fill_one_after_other": self._fill_one_after_other, + "lowest_utilization_first": self._lowest_utilization_first, + "matching_supply_demand": self._matching_supply_demand, + "minimum_power_requirement": self._minimum_power_requirement + } + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + self.state = context + + def get_state(self) -> Any: + return self.state + + def set_state(self, state: Any) -> None: + self.state = state + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select routing action using the specified algorithm. + + Args: + vehicles: List of vehicles requiring routing + context: Dictionary containing routing context + + Returns: + Dictionary containing routing decision + """ + if self.algorithm not in self.algorithm_functions: + raise ValueError(f"Unknown routing algorithm: {self.algorithm}") + + # Get algorithm function + algo_func = self.algorithm_functions[self.algorithm] + + # Execute algorithm for each vehicle + routing_assignments = [] + for vehicle in vehicles: + assignment = algo_func(vehicle, context) + routing_assignments.append(assignment) + + return { + "routing_assignments": routing_assignments, + "queue_order": list(range(len(vehicles))), + "wait_times": [0] * len(vehicles), # Placeholder + "confidence": 0.9, + "strategy": self.algorithm, + "reasoning": f"Applied {self.algorithm} routing algorithm to {len(vehicles)} vehicles" + } + + def _random_routing(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Random routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 100 * 60) + + return random_charger_assignment( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def _lowest_occupancy_first(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Lowest occupancy first routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 100 * 60) + + return lowest_occupancy_first_charger_assignment( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def _fill_one_after_other(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Fill one after other routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 24 * 60) + + return fill_one_after_other_charger_assignment( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def _lowest_utilization_first(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Lowest utilization first routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 100 * 60) + + return lowest_utilization_first_charger_assignment( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def _matching_supply_demand(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Matching supply demand routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 100 * 60) + + return matching_supply_demand_level( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def _minimum_power_requirement(self, vehicle: Any, context: Dict[str, Any]) -> Any: + """Minimum power requirement routing algorithm.""" + charging_stations = context.get("charging_stations", []) + number_of_connectors = context.get("number_of_connectors", 1) + demand_threshold = context.get("demand_threshold", 1) + duration_threshold = context.get("duration_threshold", 100 * 60) + + return assign_to_the_minimum_power( + charging_stations=charging_stations, + number_of_connectors=number_of_connectors, + request=vehicle, + demand_threshold=demand_threshold, + duration_threshold=duration_threshold + ) + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Algorithm-based agents don't learn from transitions.""" + pass + + +class AlgorithmStorageAgent(StorageAgent): + """ + Agent that wraps existing storage algorithms. + + This agent provides a standardized interface to all the existing + storage algorithms in the codebase. + """ + + def __init__(self, algorithm: str = "peak_shaving"): + self.algorithm = algorithm + self.state = None + self._agent_type = AgentType.HEURISTIC + self._decision_type = DecisionType.STORAGE + + # Algorithm mapping + self.algorithm_functions = { + "uncontrolled": self._uncontrolled_storage, + "temporal_arbitrage": self._temporal_arbitrage, + "peak_shaving": self._peak_shaving + } + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + self.state = context + + def get_state(self) -> Any: + return self.state + + def set_state(self, state: Any) -> None: + self.state = state + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select storage action using the specified algorithm. + + Args: + context: Dictionary containing storage context + + Returns: + Dictionary containing storage decision + """ + if self.algorithm not in self.algorithm_functions: + raise ValueError(f"Unknown storage algorithm: {self.algorithm}") + + # Get algorithm function + algo_func = self.algorithm_functions[self.algorithm] + + # Execute algorithm + storage_action = algo_func(context) + + return { + "storage_action": storage_action, + "power_level": abs(storage_action), + "strategy": self.algorithm, + "confidence": 0.9, + "reasoning": f"Applied {self.algorithm} storage algorithm" + } + + def _uncontrolled_storage(self, context: Dict[str, Any]) -> float: + """Uncontrolled storage algorithm.""" + env = context.get("env") + storage_object = context.get("storage_object") + + # Execute algorithm + storage_uncontrolled(env=env, storage_object=storage_object) + + # Extract storage action + if storage_object.charge_yn == 1: + return storage_object.charging_power + elif storage_object.discharge_yn == 1: + return -storage_object.discharging_power + else: + return 0.0 + + def _temporal_arbitrage(self, context: Dict[str, Any]) -> float: + """Temporal arbitrage storage algorithm.""" + env = context.get("env") + storage_object = context.get("storage_object") + planning_interval = context.get("planning_interval", 15) + electricity_tariff = context.get("electricity_tariff", [0.15] * 24) + free_grid_capacity = context.get("free_grid_capacity", 500) + ev_charging_load = context.get("ev_charging_load", 100) + + # Execute algorithm + temporal_arbitrage( + env=env, + storage_object=storage_object, + planning_interval=planning_interval, + electricity_tariff=electricity_tariff, + free_grid_capacity=free_grid_capacity, + ev_charging_load=ev_charging_load + ) + + # Extract storage action + if storage_object.charge_yn == 1: + return storage_object.charging_power + elif storage_object.discharge_yn == 1: + return -storage_object.discharging_power + else: + return 0.0 + + def _peak_shaving(self, context: Dict[str, Any]) -> float: + """Peak shaving storage algorithm.""" + env = context.get("env") + storage_object = context.get("storage_object") + planning_interval = context.get("planning_interval", 15) + electricity_tariff = context.get("electricity_tariff", [0.15] * 24) + free_grid_capacity = context.get("free_grid_capacity", 500) + ev_charging_load = context.get("ev_charging_load", 100) + + # Execute algorithm + peak_shaving( + env=env, + storage_object=storage_object, + planning_interval=planning_interval, + electricity_tariff=electricity_tariff, + free_grid_capacity=free_grid_capacity, + ev_charging_load=ev_charging_load + ) + + # Extract storage action + if storage_object.charge_yn == 1: + return storage_object.charging_power + elif storage_object.discharge_yn == 1: + return -storage_object.discharging_power + else: + return 0.0 + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Algorithm-based agents don't learn from transitions.""" + pass diff --git a/utilities/rl_agents/factory.py b/utilities/rl_agents/factory.py new file mode 100644 index 0000000..4de1788 --- /dev/null +++ b/utilities/rl_agents/factory.py @@ -0,0 +1,196 @@ +from typing import Any, Optional +from simulation.operations.agents_controller import AgentsController +from utilities.rl_agents.adapters.pricing_adapter import PricingEnvAgentAdapter +from utilities.rl_agents.adapters.charging_adapter import ChargingEnvAgentAdapter +from utilities.rl_agents.adapters.storage_adapter import StorageEnvAgentAdapter +from utilities.rl_agents.adapters.gym_agent_adapter import ( + GymPricingAgentAdapter, GymChargingAgentAdapter, GymStorageAgentAdapter, + create_gym_pricing_adapter, create_gym_charging_adapter, create_gym_storage_adapter +) +from utilities.rl_environments.rl_pricing_env import PricingEnv + + +def build_pricing_adapter(config: Any, policy: Any, dqn: bool = False) -> PricingEnvAgentAdapter: + """ + Build a PricingEnv and wrap the given policy with the adapter. + + Args: + config: pricing env configuration object (must have number_power_options, maximum_power, evaluation) + policy: underlying RL policy with act(obs) and optional reset()/learn() + dqn: whether to initialize env in DQN discrete mode + + Returns: + PricingEnvAgentAdapter instance + """ + env = PricingEnv(config, DQN=dqn, charging_hub=None, env=None) + return PricingEnvAgentAdapter(env=env, policy=policy) + + +def build_charging_adapter(config: Any, policy: Any) -> ChargingEnvAgentAdapter: + """ + Build a charging environment and wrap the given policy with the adapter. + + Args: + config: charging env configuration object + policy: underlying RL policy with act(obs) and optional reset()/learn() + + Returns: + ChargingEnvAgentAdapter instance + """ + from utilities.rl_environments.SC_env import ChargingHubInvestmentEnv + env = ChargingHubInvestmentEnv(config, charging_hub=None, env=None) + return ChargingEnvAgentAdapter(rl_agent=policy, charging_env=env) + + +def build_storage_adapter(config: Any, policy: Any) -> StorageEnvAgentAdapter: + """ + Build a storage environment and wrap the given policy with the adapter. + + Args: + config: storage env configuration object + policy: underlying RL policy with act(obs) and optional reset()/learn() + + Returns: + StorageEnvAgentAdapter instance + """ + from utilities.rl_environments.SC_storage_env import StorageEnv + env = StorageEnv(config, charging_hub=None, env=None) + return StorageEnvAgentAdapter(rl_agent=policy, storage_env=env) + + +def build_agents_controller( + pricing_agent: Optional[Any] = None, + charging_agent: Optional[Any] = None, + storage_agent: Optional[Any] = None, + pricing_env: Optional[PricingEnv] = None, + pricing_config: Optional[Any] = None, + charging_config: Optional[Any] = None, + storage_config: Optional[Any] = None +) -> AgentsController: + """ + Build an AgentsController with the specified agents. + + Args: + pricing_agent: RL agent for pricing (e.g., SAC, DQN) + charging_agent: RL agent for charging (e.g., SAC, DQN) + storage_agent: RL agent for storage (e.g., SAC, DQN) + pricing_env: Pricing environment (optional, will be created if not provided) + pricing_config: Configuration for pricing environment + charging_config: Configuration for charging environment + storage_config: Configuration for storage environment + + Returns: + AgentsController instance + """ + pricing_adapter = None + charging_adapter = None + storage_adapter = None + + if pricing_agent and pricing_config: + if not pricing_env: + pricing_env = PricingEnv(pricing_config, charging_hub=None, env=None) + pricing_adapter = PricingEnvAgentAdapter(rl_agent=pricing_agent, pricing_env=pricing_env) + + if charging_agent and charging_config: + charging_adapter = build_charging_adapter(charging_config, charging_agent) + + if storage_agent and storage_config: + storage_adapter = build_storage_adapter(storage_config, storage_agent) + + return AgentsController( + pricing=pricing_adapter, + charging=charging_adapter, + storage=storage_adapter + ) + + +# Gym-compatible agent factory functions +def build_gym_pricing_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymPricingAgentAdapter: + """ + Build a gym-compatible pricing agent adapter. + + Args: + config_dict: Configuration dictionary for the environment + gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent) + **kwargs: Additional arguments for environment creation + + Returns: + GymPricingAgentAdapter instance + """ + return create_gym_pricing_adapter(config_dict, gym_agent, **kwargs) + + +def build_gym_charging_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymChargingAgentAdapter: + """ + Build a gym-compatible charging agent adapter. + + Args: + config_dict: Configuration dictionary for the environment + gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent) + **kwargs: Additional arguments for environment creation + + Returns: + GymChargingAgentAdapter instance + """ + return create_gym_charging_adapter(config_dict, gym_agent, **kwargs) + + +def build_gym_storage_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymStorageAgentAdapter: + """ + Build a gym-compatible storage agent adapter. + + Args: + config_dict: Configuration dictionary for the environment + gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent) + **kwargs: Additional arguments for environment creation + + Returns: + GymStorageAgentAdapter instance + """ + return create_gym_storage_adapter(config_dict, gym_agent, **kwargs) + + +def build_gym_agents_controller( + pricing_agent: Optional[Any] = None, + charging_agent: Optional[Any] = None, + storage_agent: Optional[Any] = None, + pricing_config: Optional[Dict[str, Any]] = None, + charging_config: Optional[Dict[str, Any]] = None, + storage_config: Optional[Dict[str, Any]] = None, + **kwargs +) -> AgentsController: + """ + Build an AgentsController with gym-compatible agents. + + Args: + pricing_agent: Gym-compatible RL agent for pricing + charging_agent: Gym-compatible RL agent for charging + storage_agent: Gym-compatible RL agent for storage + pricing_config: Configuration for pricing environment + charging_config: Configuration for charging environment + storage_config: Configuration for storage environment + **kwargs: Additional arguments for environment creation + + Returns: + AgentsController instance with gym-compatible agents + """ + pricing_adapter = None + charging_adapter = None + storage_adapter = None + + if pricing_agent and pricing_config: + pricing_adapter = build_gym_pricing_adapter(pricing_config, pricing_agent, **kwargs) + + if charging_agent and charging_config: + charging_adapter = build_gym_charging_adapter(charging_config, charging_agent, **kwargs) + + if storage_agent and storage_config: + storage_adapter = build_gym_storage_adapter(storage_config, storage_agent, **kwargs) + + return AgentsController( + pricing=pricing_adapter, + charging=charging_adapter, + storage=storage_adapter + ) + + diff --git a/utilities/rl_agents/interfaces.py b/utilities/rl_agents/interfaces.py new file mode 100644 index 0000000..12706b0 --- /dev/null +++ b/utilities/rl_agents/interfaces.py @@ -0,0 +1,317 @@ +from typing import Protocol, Any, Dict, List, Optional, Union +from enum import Enum + + +class DecisionType(Enum): + """Types of decisions that can be made by agents""" + PRICING = "pricing" + CHARGING = "charging" + STORAGE = "storage" + ROUTING = "routing" + VEHICLE_ASSIGNMENT = "vehicle_assignment" + PARKING_ALLOCATION = "parking_allocation" + GRID_MANAGEMENT = "grid_management" + DEMAND_FORECASTING = "demand_forecasting" + + +class AgentType(Enum): + """Types of agents that can make decisions""" + RL_SAC = "rl_sac" + RL_DQN = "rl_dqn" + RL_DDPG = "rl_ddpg" + RULE_BASED = "rule_based" + HEURISTIC = "heuristic" + OPTIMIZATION = "optimization" + ML_MODEL = "ml_model" + + +class BaseAgent(Protocol): + """ + Base interface for all decision-making agents. + + This protocol defines the standard interface that all agents must implement, + regardless of whether they are RL agents, rule-based agents, or other types. + """ + + @property + def agent_type(self) -> AgentType: ... + + @property + def decision_type(self) -> DecisionType: ... + + def reset(self) -> None: ... + + def update_state(self, context: Dict[str, Any]) -> None: ... + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: ... + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: ... + + def get_state(self) -> Any: ... + + def set_state(self, state: Any) -> None: ... + + +class PricingAgent(BaseAgent): + """ + Interface for pricing decision agents. + + Pricing agents make decisions about: + - Energy prices + - Parking fees + - Dynamic pricing strategies + - Price optimization + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.PRICING + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select pricing action based on current context. + + Args: + context: Dictionary containing: + - eval_ep: Whether this is evaluation mode + - pricing_mode: "Discrete", "Continuous", "ToU" + - charging_hub: Current charging hub state + - env: Simulation environment + - current_demand: Current energy demand + - grid_capacity: Available grid capacity + + Returns: + Dictionary containing: + - pricing_parameters: List of pricing parameters + - energy_price: Energy price per kWh + - parking_fee: Parking fee per hour + - confidence: Confidence in the decision (0-1) + """ + ... + + +class ChargingAgent(BaseAgent): + """ + Interface for charging decision agents. + + Charging agents make decisions about: + - Charging power allocation + - Charging schedules + - Priority assignment + - Load balancing + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.CHARGING + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select charging action based on vehicles and context. + + Args: + vehicles: List of vehicles requiring charging decisions + context: Dictionary containing: + - eval_ep: Whether this is evaluation mode + - charging_hub: Current charging hub state + - env: Simulation environment + - available_power: Available charging power + - grid_constraints: Grid capacity constraints + + Returns: + Dictionary containing: + - charging_actions: List of charging actions per vehicle + - power_allocation: Power allocation strategy + - priority_order: Vehicle priority ordering + - confidence: Confidence in the decision (0-1) + """ + ... + + +class StorageAgent(BaseAgent): + """ + Interface for storage decision agents. + + Storage agents make decisions about: + - Energy storage charging/discharging + - Storage scheduling + - Peak shaving strategies + - Grid support operations + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.STORAGE + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select storage action based on current context. + + Args: + context: Dictionary containing: + - eval_ep: Whether this is evaluation mode + - charging_hub: Current charging hub state + - env: Simulation environment + - storage_soc: Current state of charge + - grid_demand: Current grid demand + - pv_generation: Current PV generation + + Returns: + Dictionary containing: + - storage_action: Charging/discharging action + - power_level: Power level for storage operation + - strategy: Storage strategy (peak_shaving, arbitrage, etc.) + - confidence: Confidence in the decision (0-1) + """ + ... + + +class RoutingAgent(BaseAgent): + """ + Interface for routing decision agents. + + Routing agents make decisions about: + - Vehicle routing to charging stations + - Parking space allocation + - Queue management + - Resource assignment + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.ROUTING + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select routing action based on vehicles and context. + + Args: + vehicles: List of vehicles requiring routing decisions + context: Dictionary containing: + - eval_ep: Whether this is evaluation mode + - charging_hub: Current charging hub state + - env: Simulation environment + - available_spaces: Available parking/charging spaces + - queue_status: Current queue status + + Returns: + Dictionary containing: + - routing_assignments: Vehicle to space assignments + - queue_order: Queue ordering + - wait_times: Estimated wait times + - confidence: Confidence in the decision (0-1) + """ + ... + + +class VehicleAssignmentAgent(BaseAgent): + """ + Interface for vehicle assignment decision agents. + + Vehicle assignment agents make decisions about: + - Which charging station to assign vehicles to + - Charging connector allocation + - Priority-based assignments + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.VEHICLE_ASSIGNMENT + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select vehicle assignment action. + + Args: + vehicles: List of vehicles to assign + context: Dictionary containing assignment context + + Returns: + Dictionary containing assignment decisions + """ + ... + + +class ParkingAllocationAgent(BaseAgent): + """ + Interface for parking allocation decision agents. + + Parking allocation agents make decisions about: + - Parking space allocation + - Parking duration optimization + - Space utilization + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.PARKING_ALLOCATION + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select parking allocation action. + + Args: + vehicles: List of vehicles requiring parking + context: Dictionary containing parking context + + Returns: + Dictionary containing parking allocation decisions + """ + ... + + +class GridManagementAgent(BaseAgent): + """ + Interface for grid management decision agents. + + Grid management agents make decisions about: + - Grid capacity management + - Load balancing + - Grid stability + - Peak demand management + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.GRID_MANAGEMENT + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select grid management action. + + Args: + context: Dictionary containing grid management context + + Returns: + Dictionary containing grid management decisions + """ + ... + + +class DemandForecastingAgent(BaseAgent): + """ + Interface for demand forecasting agents. + + Demand forecasting agents make decisions about: + - Energy demand prediction + - Load forecasting + - Demand patterns analysis + """ + + @property + def decision_type(self) -> DecisionType: + return DecisionType.DEMAND_FORECASTING + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select demand forecasting action. + + Args: + context: Dictionary containing forecasting context + + Returns: + Dictionary containing demand forecasts + """ + ... + + diff --git a/utilities/rl_agents/rule_based_agents.py b/utilities/rl_agents/rule_based_agents.py new file mode 100644 index 0000000..7ff1d6c --- /dev/null +++ b/utilities/rl_agents/rule_based_agents.py @@ -0,0 +1,479 @@ +from typing import Any, Dict, List, Optional +import numpy as np +from datetime import datetime + +from utilities.rl_agents.interfaces import ( + BaseAgent, + DecisionType, + AgentType, + PricingAgent, + ChargingAgent, + StorageAgent, + RoutingAgent, + VehicleAssignmentAgent, + ParkingAllocationAgent, + GridManagementAgent, + DemandForecastingAgent +) + + +class RuleBasedPricingAgent(PricingAgent): + """ + Rule-based pricing agent that implements simple pricing strategies. + + This agent demonstrates how rule-based agents can be used alongside RL agents. + It implements common pricing strategies like time-of-use, demand-based, and + cost-plus pricing. + """ + + def __init__(self, strategy: str = "time_of_use"): + self.strategy = strategy + self.state = None + self._agent_type = AgentType.RULE_BASED + self._decision_type = DecisionType.PRICING + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + """Reset the agent state.""" + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + """Update agent state based on context.""" + self.state = context + + def get_state(self) -> Any: + """Get current agent state.""" + return self.state + + def set_state(self, state: Any) -> None: + """Set agent state.""" + self.state = state + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select pricing action based on rule-based strategy. + + Args: + context: Dictionary containing pricing context + + Returns: + Dictionary containing pricing decision + """ + if self.strategy == "time_of_use": + return self._time_of_use_pricing(context) + elif self.strategy == "demand_based": + return self._demand_based_pricing(context) + elif self.strategy == "cost_plus": + return self._cost_plus_pricing(context) + else: + return self._default_pricing(context) + + def _time_of_use_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Time-of-use pricing strategy.""" + env = context.get("env") + current_hour = int((env.now % 1440) / 60) if env else 12 + + # Peak hours: 8-10 AM and 6-8 PM + if current_hour in [8, 9, 18, 19]: + energy_price = 0.25 # High price during peak + elif current_hour in [10, 11, 12, 13, 14, 15, 16, 17]: + energy_price = 0.15 # Medium price during day + else: + energy_price = 0.10 # Low price during off-peak + + parking_fee = 2.0 # Fixed parking fee + + return { + "pricing_parameters": [energy_price, parking_fee], + "energy_price": energy_price, + "parking_fee": parking_fee, + "confidence": 0.9, + "strategy": "time_of_use", + "reasoning": f"Peak hour pricing applied for hour {current_hour}" + } + + def _demand_based_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Demand-based pricing strategy.""" + charging_hub = context.get("charging_hub") + current_demand = charging_hub.grid.current_load if charging_hub else 100 + max_capacity = charging_hub.grid.capacity if charging_hub else 500 + + # Calculate demand ratio + demand_ratio = current_demand / max_capacity if max_capacity > 0 else 0.2 + + # Base price with demand multiplier + base_price = 0.15 + if demand_ratio > 0.8: + energy_price = base_price * 1.5 # High demand + elif demand_ratio > 0.6: + energy_price = base_price * 1.2 # Medium demand + else: + energy_price = base_price # Low demand + + parking_fee = 2.0 + + return { + "pricing_parameters": [energy_price, parking_fee], + "energy_price": energy_price, + "parking_fee": parking_fee, + "confidence": 0.85, + "strategy": "demand_based", + "reasoning": f"Demand ratio {demand_ratio:.2f} applied" + } + + def _cost_plus_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Cost-plus pricing strategy.""" + # Assume base electricity cost + base_cost = 0.12 + markup = 0.25 # 25% markup + + energy_price = base_cost * (1 + markup) + parking_fee = 2.0 + + return { + "pricing_parameters": [energy_price, parking_fee], + "energy_price": energy_price, + "parking_fee": parking_fee, + "confidence": 0.95, + "strategy": "cost_plus", + "reasoning": f"Cost-plus pricing with {markup*100}% markup" + } + + def _default_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Default pricing strategy.""" + energy_price = 0.15 + parking_fee = 2.0 + + return { + "pricing_parameters": [energy_price, parking_fee], + "energy_price": energy_price, + "parking_fee": parking_fee, + "confidence": 0.8, + "strategy": "default", + "reasoning": "Default pricing applied" + } + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Rule-based agents don't learn from transitions.""" + pass + + +class RuleBasedChargingAgent(ChargingAgent): + """ + Rule-based charging agent that implements simple charging strategies. + + This agent implements strategies like first-come-first-served, priority-based, + and load-balancing charging. + """ + + def __init__(self, strategy: str = "first_come_first_served"): + self.strategy = strategy + self.state = None + self._agent_type = AgentType.RULE_BASED + self._decision_type = DecisionType.CHARGING + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + self.state = context + + def get_state(self) -> Any: + return self.state + + def set_state(self, state: Any) -> None: + self.state = state + + def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select charging action based on rule-based strategy. + + Args: + vehicles: List of vehicles requiring charging + context: Dictionary containing charging context + + Returns: + Dictionary containing charging decision + """ + if self.strategy == "first_come_first_served": + return self._first_come_first_served(vehicles, context) + elif self.strategy == "priority_based": + return self._priority_based(vehicles, context) + elif self.strategy == "load_balancing": + return self._load_balancing(vehicles, context) + else: + return self._default_charging(vehicles, context) + + def _first_come_first_served(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """First-come-first-served charging strategy.""" + charging_actions = [] + priority_order = [] + + # Sort vehicles by arrival time + sorted_vehicles = sorted(vehicles, key=lambda v: v.arrival_period) + + for i, vehicle in enumerate(sorted_vehicles): + # Assign equal power to all vehicles + charging_power = 22.0 # Default charging power + charging_actions.append(charging_power) + priority_order.append(i) + + return { + "charging_actions": charging_actions, + "power_allocation": "equal", + "priority_order": priority_order, + "confidence": 0.9, + "strategy": "first_come_first_served", + "reasoning": f"FCFS strategy applied to {len(vehicles)} vehicles" + } + + def _priority_based(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """Priority-based charging strategy.""" + charging_actions = [] + priority_order = [] + + # Sort vehicles by priority (energy deficit, departure time, etc.) + def priority_key(vehicle): + energy_deficit = vehicle.remaining_energy_deficit + time_until_departure = vehicle.departure_period - context.get("env", {}).now + return (energy_deficit, -time_until_departure) # Higher deficit and earlier departure = higher priority + + sorted_vehicles = sorted(vehicles, key=priority_key, reverse=True) + + for i, vehicle in enumerate(sorted_vehicles): + # Higher priority vehicles get more power + if i < len(vehicles) // 3: + charging_power = 50.0 # High priority + elif i < 2 * len(vehicles) // 3: + charging_power = 22.0 # Medium priority + else: + charging_power = 11.0 # Low priority + + charging_actions.append(charging_power) + priority_order.append(i) + + return { + "charging_actions": charging_actions, + "power_allocation": "priority_based", + "priority_order": priority_order, + "confidence": 0.85, + "strategy": "priority_based", + "reasoning": f"Priority-based strategy applied to {len(vehicles)} vehicles" + } + + def _load_balancing(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """Load balancing charging strategy.""" + charging_actions = [] + priority_order = [] + + # Calculate total available power + charging_hub = context.get("charging_hub") + available_power = charging_hub.grid.capacity if charging_hub else 500 + + # Distribute power evenly among vehicles + power_per_vehicle = available_power / len(vehicles) if vehicles else 0 + + for i, vehicle in enumerate(vehicles): + charging_actions.append(power_per_vehicle) + priority_order.append(i) + + return { + "charging_actions": charging_actions, + "power_allocation": "load_balanced", + "priority_order": priority_order, + "confidence": 0.8, + "strategy": "load_balancing", + "reasoning": f"Load balancing with {power_per_vehicle:.1f} kW per vehicle" + } + + def _default_charging(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]: + """Default charging strategy.""" + charging_actions = [22.0] * len(vehicles) # Default power for all vehicles + priority_order = list(range(len(vehicles))) + + return { + "charging_actions": charging_actions, + "power_allocation": "default", + "priority_order": priority_order, + "confidence": 0.7, + "strategy": "default", + "reasoning": f"Default charging strategy applied to {len(vehicles)} vehicles" + } + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Rule-based agents don't learn from transitions.""" + pass + + +class RuleBasedStorageAgent(StorageAgent): + """ + Rule-based storage agent that implements simple storage strategies. + + This agent implements strategies like peak shaving, arbitrage, and + grid support operations. + """ + + def __init__(self, strategy: str = "peak_shaving"): + self.strategy = strategy + self.state = None + self._agent_type = AgentType.RULE_BASED + self._decision_type = DecisionType.STORAGE + + @property + def agent_type(self) -> AgentType: + return self._agent_type + + @property + def decision_type(self) -> DecisionType: + return self._decision_type + + def reset(self) -> None: + self.state = None + + def update_state(self, context: Dict[str, Any]) -> None: + self.state = context + + def get_state(self) -> Any: + return self.state + + def set_state(self, state: Any) -> None: + self.state = state + + def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Select storage action based on rule-based strategy. + + Args: + context: Dictionary containing storage context + + Returns: + Dictionary containing storage decision + """ + if self.strategy == "peak_shaving": + return self._peak_shaving(context) + elif self.strategy == "arbitrage": + return self._arbitrage(context) + elif self.strategy == "grid_support": + return self._grid_support(context) + else: + return self._default_storage(context) + + def _peak_shaving(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Peak shaving storage strategy.""" + charging_hub = context.get("charging_hub") + current_load = charging_hub.grid.current_load if charging_hub else 100 + max_capacity = charging_hub.grid.capacity if charging_hub else 500 + storage_soc = charging_hub.electric_storage.soc if charging_hub else 0.5 + + # Discharge if load is high and storage has capacity + if current_load > max_capacity * 0.8 and storage_soc > 0.2: + storage_action = -50.0 # Discharge + strategy = "peak_shaving_discharge" + elif current_load < max_capacity * 0.4 and storage_soc < 0.8: + storage_action = 30.0 # Charge + strategy = "peak_shaving_charge" + else: + storage_action = 0.0 # No action + strategy = "peak_shaving_idle" + + return { + "storage_action": storage_action, + "power_level": abs(storage_action), + "strategy": strategy, + "confidence": 0.85, + "reasoning": f"Peak shaving: load={current_load:.1f}, soc={storage_soc:.2f}" + } + + def _arbitrage(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Arbitrage storage strategy.""" + env = context.get("env") + current_hour = int((env.now % 1440) / 60) if env else 12 + storage_soc = context.get("storage_soc", 0.5) + + # Charge during low-price hours (night), discharge during high-price hours (day) + if 22 <= current_hour or current_hour <= 6: # Night hours + if storage_soc < 0.9: + storage_action = 40.0 # Charge + strategy = "arbitrage_charge" + else: + storage_action = 0.0 # Full + strategy = "arbitrage_full" + else: # Day hours + if storage_soc > 0.1: + storage_action = -40.0 # Discharge + strategy = "arbitrage_discharge" + else: + storage_action = 0.0 # Empty + strategy = "arbitrage_empty" + + return { + "storage_action": storage_action, + "power_level": abs(storage_action), + "strategy": strategy, + "confidence": 0.8, + "reasoning": f"Arbitrage: hour={current_hour}, soc={storage_soc:.2f}" + } + + def _grid_support(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Grid support storage strategy.""" + charging_hub = context.get("charging_hub") + grid_frequency = getattr(charging_hub.grid, 'frequency', 50.0) if charging_hub else 50.0 + storage_soc = charging_hub.electric_storage.soc if charging_hub else 0.5 + + # Support grid frequency + if grid_frequency < 49.8: # Low frequency + if storage_soc > 0.1: + storage_action = -30.0 # Discharge to support + strategy = "grid_support_discharge" + else: + storage_action = 0.0 + strategy = "grid_support_empty" + elif grid_frequency > 50.2: # High frequency + if storage_soc < 0.9: + storage_action = 30.0 # Charge to absorb + strategy = "grid_support_charge" + else: + storage_action = 0.0 + strategy = "grid_support_full" + else: # Normal frequency + storage_action = 0.0 + strategy = "grid_support_idle" + + return { + "storage_action": storage_action, + "power_level": abs(storage_action), + "strategy": strategy, + "confidence": 0.9, + "reasoning": f"Grid support: frequency={grid_frequency:.1f}, soc={storage_soc:.2f}" + } + + def _default_storage(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Default storage strategy.""" + storage_action = 0.0 + + return { + "storage_action": storage_action, + "power_level": 0.0, + "strategy": "default", + "confidence": 0.7, + "reasoning": "Default storage strategy applied" + } + + def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: + """Rule-based agents don't learn from transitions.""" + pass diff --git a/utilities/rl_environments/SC_env.py b/utilities/rl_environments/SC_env.py index 5f0e36a..695b458 100644 --- a/utilities/rl_environments/SC_env.py +++ b/utilities/rl_environments/SC_env.py @@ -10,7 +10,7 @@ class ChargingHubInvestmentEnv(gym.Env): reward_range = (-float("inf"), float("inf")) spec = None - def __init__(self, config): + def __init__(self, config, charging_hub=None, env=None): # Set these in ALL subclasses self.action_space = spaces.Box( low=0, @@ -26,8 +26,8 @@ def __init__(self, config): shape=(config.number_chargers * 3 + 2 + 5,), dtype=np.float64, ) - self.charging_hub = None - self.env = None + self.charging_hub = charging_hub + self.env = env self.id = 1 self.episode = 0 # vehicles_to_decide = [vehicle for vehicle in self.fleet.vehicles if vehicle.mode in ['idle','parking','circling']][0:10] @@ -129,7 +129,7 @@ def get_state(self, charging_hub=None, env=None): state = np.append(state, charger_state) return state - def step(self, action, charging_hub=None, env=None): + def step(self, action): # Execute one time step within the environment # the first action is charging/discharging of the battery # storage_power = action[0] @@ -144,9 +144,9 @@ def step(self, action, charging_hub=None, env=None): # if len(charging_vehicles) > 0: # charging_vehicles[0].charging_power = action[i+1] self.current_step += 1 - reward = self._take_action(action, charging_hub, env) + reward = self._take_action(action) done = self.current_step >= 100000000000000 - obs = self._next_observation(charging_hub, env) + obs = self._next_observation() return obs, reward, done, {} def receive_action(self): @@ -165,28 +165,130 @@ def reset(self): def render(self, mode="human", close=False): print(self.reward) - def _take_action(self, action, charging_hub, env): + def _take_action(self, action): reward = 0 penalty_ratio = 0.001 - reward -= charging_hub.reward["missed"] - reward -= charging_hub.reward["feasibility"] * penalty_ratio - # reward -= charging_hub.reward['feasibility_storage'] * penalty_ratio + reward -= self.charging_hub.reward["missed"] + reward -= self.charging_hub.reward["feasibility"] * penalty_ratio + # reward -= self.charging_hub.reward['feasibility_storage'] * penalty_ratio - self.total_reward["missed"] -= charging_hub.reward["missed"] + self.total_reward["missed"] -= self.charging_hub.reward["missed"] # print(f'charging:{self.total_reward["missed"]}') self.total_reward["feasibility"] -= ( - charging_hub.reward["feasibility"] * penalty_ratio + self.charging_hub.reward["feasibility"] * penalty_ratio ) - # self.total_reward['feasibility_storage'] -= charging_hub.reward['feasibility_storage'] * penalty_ratio - self.total_reward["energy"] -= charging_hub.grid.energy_rewards * 0 + # self.total_reward['feasibility_storage'] -= self.charging_hub.reward['feasibility_storage'] * penalty_ratio + self.total_reward["energy"] -= self.charging_hub.grid.energy_rewards * 0 - if not charging_hub.dynamic_pricing: - charging_hub.reward["missed"] = 0 - charging_hub.reward["feasibility_storage"] = 0 - charging_hub.reward["feasibility"] = 0 + if not self.charging_hub.dynamic_pricing: + self.charging_hub.reward["missed"] = 0 + self.charging_hub.reward["feasibility_storage"] = 0 + self.charging_hub.reward["feasibility"] = 0 return reward / 100 - def _next_observation(self, charging_hub, env): - return self.get_state(charging_hub, env) + def _next_observation(self): + return self.get_state(self.charging_hub, self.env) + + def penalty_action(self, action): + """ + Calculate feasibility penalties for charging actions. + This method was moved from SAC.py to keep simulation logic in the environment. + """ + if not self.charging_hub: + return + + vehicle_state = self.state[24 + 5 + 5 :] if hasattr(self, 'state') else [] + ### check charging action + total_usage = np.array([]) + i = 0 + for charger in self.charging_hub.chargers: + associated_power = np.array([]) + for j in range(charger.number_of_connectors): + maximum_power = charger.power + if vehicle_state[i * 3] <= 0: + self.charging_hub.reward["feasibility"] += action[i + 1] + else: + associated_power = np.append(associated_power, action[i + 1]) + total_usage = np.append(total_usage, action[i + 1]) + i += 1 + surplus_per_charger = max(associated_power.sum() - maximum_power, 0) + self.charging_hub.reward["feasibility"] += surplus_per_charger + total_surplus = max( + total_usage.sum() - self.charging_hub.operator.free_grid_capa_actual[0], 0 + ) + self.charging_hub.reward["feasibility"] += total_surplus + + def checked_action(self, action): + """ + Check and adjust charging actions for feasibility. + This method was moved from SAC.py to keep simulation logic in the environment. + """ + if not self.charging_hub: + return action + + vehicle_state = self.state[24 + 5 + 5 :] if hasattr(self, 'state') else [] + ### check charging action + i = 0 + for charger in self.charging_hub.chargers: + lower_bound = i + 1 + for j in range(charger.number_of_connectors): + maximum_power = charger.power + if vehicle_state[i * 3] <= 0: + action[i + 1] = 0 + i += 1 + upper_bound = i + 1 + + while action[lower_bound:upper_bound].sum() > maximum_power: + number_active_chargers = len( + [f for f in action[lower_bound:upper_bound] if f > 0] + ) + surplus_per_charger = ( + max(action[lower_bound:upper_bound].sum() - maximum_power, 0) + / number_active_chargers + ) + action[lower_bound:upper_bound] -= surplus_per_charger + for c in range(len(action[lower_bound:upper_bound])): + action[lower_bound:upper_bound][c] = max( + action[lower_bound:upper_bound][c], 0 + ) + + storage_object = self.charging_hub.electric_storage + storage_object.SoC = min( + storage_object.SoC, storage_object.max_energy_stored_kWh + ) + storage_object.SoC = max(storage_object.SoC, 0) + if action[0] >= 0: + if ( + storage_object.SoC + action[0] / 60 * self.charging_hub.planning_interval + > storage_object.max_energy_stored_kWh + ): + action[0] = ( + storage_object.max_energy_stored_kWh - storage_object.SoC + ) / (60 * self.charging_hub.planning_interval) + action[0] = min(action[0], self.charging_hub.operator.free_grid_capa_actual[0]) + + # discharge rate cannot exceed SoC, and hub demand (i.e., no infeed) + if action[0] < 0: + if storage_object.SoC <= 0: + action[0] = 0 + elif ( + storage_object.SoC + (action[0] / 60 * self.charging_hub.planning_interval) + < 0 + ): + action[0] = -max( + (storage_object.SoC) / (60 * self.charging_hub.planning_interval), 0 + ) + + while action.sum() - self.charging_hub.operator.free_grid_capa_actual[0] > 0: + number_active_chargers = len([a for a in action if a > 0]) + surplus_per_charger = ( + max(action.sum() - self.charging_hub.operator.free_grid_capa_actual[0], 0) + / number_active_chargers + ) + for i in range(1, len(action)): + action[i] = max(action[i] - surplus_per_charger, 0) + # if action[0]>0: + # action[0] = max(action[0] - surplus_per_charger, 0) + return action diff --git a/utilities/rl_environments/SC_storage_env.py b/utilities/rl_environments/SC_storage_env.py index d007ae6..be34420 100644 --- a/utilities/rl_environments/SC_storage_env.py +++ b/utilities/rl_environments/SC_storage_env.py @@ -10,7 +10,7 @@ class StorageEnv(gym.Env): reward_range = (-float("inf"), float("inf")) spec = None - def __init__(self, config): + def __init__(self, config, charging_hub=None, env=None): # Set these in ALL subclasses self.action_space = spaces.Box(low=250, high=800, shape=(1,), dtype=np.float64) self.observation_space = spaces.Box( @@ -19,8 +19,8 @@ def __init__(self, config): shape=(config.number_chargers * 3 + 24 + 5 + 5,), dtype=np.float64, ) - self.charging_hub = None - self.env = None + self.charging_hub = charging_hub + self.env = env self.id = 1 self.episode = 0 # vehicles_to_decide = [vehicle for vehicle in self.fleet.vehicles if vehicle.mode in ['idle','parking','circling']][0:10] @@ -104,7 +104,7 @@ def get_state(self, charging_hub=None, env=None): # print(len(state)) return state - def step(self, action, charging_hub=None, env=None): + def step(self, action): # Execute one time step within the environment # the first action is charging/discharging of the battery # storage_power = action[0] @@ -119,9 +119,9 @@ def step(self, action, charging_hub=None, env=None): # if len(charging_vehicles) > 0: # charging_vehicles[0].charging_power = action[i+1] self.current_step += 1 - reward = self._take_action(action, charging_hub, env) + reward = self._take_action(action) done = self.current_step >= 100000000000000 - obs = self._next_observation(charging_hub, env) + obs = self._next_observation() return obs, reward, done, {} def receive_action(self): @@ -140,21 +140,21 @@ def reset(self): def render(self, mode="human", close=False): print(self.reward) - def _take_action(self, action, charging_hub, env): + def _take_action(self, action): # # state = state.reshape((1, self._state_size)) # lg.info(f'old_state={fleet.old_state}, old_action={fleet.old_action}') # lg.info(f'new_action={action}, new_state={state}, {fleet.charging_count}') reward = 0 - reward -= charging_hub.reward["missed"] + reward -= self.charging_hub.reward["missed"] - charging_hub.reward["missed"] = 0 + self.charging_hub.reward["missed"] = 0 ### TODO add the energy rewards to reward["costs"] # charging_hub.grid.energy_rewards = 0 - charging_hub.reward["feasibility"] = 0 - charging_hub.reward["feasibility_storage"] = 0 + self.charging_hub.reward["feasibility"] = 0 + self.charging_hub.reward["feasibility_storage"] = 0 return reward - def _next_observation(self, charging_hub, env): - return self.get_state(charging_hub, env) + def _next_observation(self): + return self.get_state(self.charging_hub, self.env) diff --git a/utilities/rl_environments/evch_gym_env.py b/utilities/rl_environments/evch_gym_env.py new file mode 100644 index 0000000..9840ea4 --- /dev/null +++ b/utilities/rl_environments/evch_gym_env.py @@ -0,0 +1,670 @@ +import gym +from gym import spaces +import numpy as np +from typing import Dict, Any, Optional, Tuple, Union +from dataclasses import dataclass +from enum import Enum + +from utilities.rl_environments.rl_pricing_env import PricingEnv +from utilities.rl_environments.SC_env import ChargingHubInvestmentEnv +from utilities.rl_environments.SC_storage_env import StorageEnv + + +class AgentType(Enum): + """Enumeration of available agent types.""" + PRICING = "pricing" + CHARGING = "charging" + STORAGE = "storage" + + +@dataclass +class EVCHConfig: + """Configuration for the EVCH gym environment.""" + agent_type: AgentType + number_chargers: int + number_power_options: int + maximum_power: float + maximum_grid_usage: float + evaluation: bool = False + pricing_mode: str = "Continuous" + dynamic_fix_term_pricing: bool = False + capacity_pricing: bool = False + dynamic_parking_fee: bool = False + limiting_grid_capa: bool = False + dynamic_storage_scheduling: bool = False + + + +class EVCHGymEnv(gym.Env): + """ + Unified gym environment for EV Charging Hub operations. + + This environment completely decouples RL agents from the simulation, + providing a standard gym interface that can be used with any gym-compatible + RL library (Stable Baselines3, RLlib, etc.). + + The environment can be configured for different agent types: + - PRICING: Dynamic pricing decisions + - CHARGING: Charging optimization decisions + - STORAGE: Energy storage management decisions + """ + + metadata = {"render_modes": ["human"], "render_fps": 4} + + def __init__(self, config: EVCHConfig, charging_hub: Optional[Any] = None, sim_env: Optional[Any] = None): + """ + Initialize the EVCH gym environment. + + Args: + config: Configuration object specifying agent type and parameters + charging_hub: Reference to the charging hub (will be set later if None) + sim_env: Reference to the simulation environment (will be set later if None) + """ + super().__init__() + + self.config = config + self.agent_type = config.agent_type + self.charging_hub = charging_hub + self.sim_env = sim_env + + # Initialize the appropriate underlying environment + self._init_underlying_env() + + # Set observation and action spaces + self._set_spaces() + + # State tracking + self.current_state = None + self.current_action = None + self.current_reward = 0.0 + self.done = False + self.info = {} + + # Episode tracking + self.episode_step = 0 + self.max_episode_steps = 1000 # Configurable + + def _init_underlying_env(self): + """Initialize the underlying environment based on agent type.""" + if self.agent_type == AgentType.PRICING: + self.underlying_env = PricingEnv(self.config, DQN=False, charging_hub=None, env=None) + elif self.agent_type == AgentType.CHARGING: + self.underlying_env = ChargingHubInvestmentEnv(self.config, charging_hub=None, env=None) + elif self.agent_type == AgentType.STORAGE: + self.underlying_env = StorageEnv(self.config, charging_hub=None, env=None) + else: + raise ValueError(f"Unknown agent type: {self.agent_type}") + + def _set_spaces(self): + """Set observation and action spaces based on the underlying environment.""" + # Use the underlying environment's spaces + self.observation_space = self.underlying_env.observation_space + self.action_space = self.underlying_env.action_space + + def set_simulation_context(self, charging_hub: Any, sim_env: Any): + """ + Set the simulation context (charging hub and environment). + + This method allows the gym environment to be connected to the actual + simulation without tight coupling. + + Args: + charging_hub: The charging hub object + sim_env: The simulation environment + """ + self.charging_hub = charging_hub + self.sim_env = sim_env + self.underlying_env.charging_hub = charging_hub + self.underlying_env.env = sim_env + + def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]: + """ + Reset the environment to initial state. + + Args: + seed: Random seed for reproducibility + options: Additional options for reset + + Returns: + Tuple of (observation, info) + """ + super().reset(seed=seed) + + # Reset underlying environment + if hasattr(self.underlying_env, 'reset'): + self.current_state = self.underlying_env.reset() + else: + # Fallback: get initial state + self.current_state = self._get_state() + + # Reset episode tracking + self.episode_step = 0 + self.done = False + self.current_reward = 0.0 + self.info = {} + + return self.current_state, self.info + + def step(self, action: Union[np.ndarray, int]) -> Tuple[np.ndarray, float, bool, bool, Dict]: + """ + Take a step in the environment. + + Args: + action: The action to take (numpy array or int) + + Returns: + Tuple of (observation, reward, terminated, truncated, info) + """ + if self.charging_hub is None or self.sim_env is None: + raise RuntimeError("Simulation context not set. Call set_simulation_context() first.") + + # Store current action + self.current_action = action + + # Apply action based on agent type + reward = self._apply_action(action) + + # Get new state with error handling + try: + next_state = self._get_state() + except Exception as e: + # If we can't get the state (e.g., simulation ended), return zeros and mark as terminated + print(f"Could not get state: {e}. Marking episode as terminated.") + next_state = np.zeros(self.observation_space.shape[0]) + terminated = True + truncated = False + self.done = True + info = { + "agent_type": self.agent_type.value, + "episode_step": self.episode_step, + "action": action, + "reward": reward, + "error": str(e) + } + return next_state, reward, terminated, truncated, info + + # Update state + self.current_state = next_state + self.current_reward = reward + self.episode_step += 1 + + # Check if episode is done + terminated = self._is_episode_done() + truncated = self.episode_step >= self.max_episode_steps + self.done = terminated or truncated + + # Prepare info + info = { + "agent_type": self.agent_type.value, + "episode_step": self.episode_step, + "action": action, + "reward": reward + } + + return next_state, reward, terminated, truncated, info + + def _get_state(self) -> np.ndarray: + """Get the current state from the underlying environment.""" + if hasattr(self.underlying_env, 'get_state'): + return self.underlying_env.get_state(self.charging_hub, self.sim_env) + else: + # Fallback: return zeros if no get_state method + return np.zeros(self.observation_space.shape[0]) + + def _apply_action(self, action: Union[np.ndarray, int]) -> float: + """ + Apply the action and return the reward. + + Args: + action: The action to apply + + Returns: + The reward received + """ + if self.agent_type == AgentType.PRICING: + return self._apply_pricing_action(action) + elif self.agent_type == AgentType.CHARGING: + return self._apply_charging_action(action) + elif self.agent_type == AgentType.STORAGE: + return self._apply_storage_action(action) + else: + raise ValueError(f"Unknown agent type: {self.agent_type}") + + def _apply_pricing_action(self, action: Union[np.ndarray, int]) -> float: + """Apply pricing action and return reward.""" + # Store action in the charging hub's pricing agent if it exists + if hasattr(self.charging_hub, 'pricing_agent') and self.charging_hub.pricing_agent is not None: + self.charging_hub.pricing_agent.action = action + + # Apply the pricing action to the pricing parameters for continuous pricing + if hasattr(self.charging_hub, 'operator'): + try: + # Get the pricing mode from configuration + from resources.configuration.configuration import Configuration + config = Configuration.instance() + pricing_mode = getattr(config, 'pricing_mode', 'Continuous') + + if pricing_mode == "Continuous": + # For continuous pricing, apply the action to pricing_parameters + if hasattr(self.charging_hub.operator, 'pricing_parameters'): + # Apply action to pricing parameters: + # pricing_parameters[0] = fixed term (p_0) - energy price component + # pricing_parameters[1] = rate-based term (alpha) - capacity price component + if len(action) >= 2: + self.charging_hub.operator.pricing_parameters[0] = action[0] # Fixed term (p_0) + self.charging_hub.operator.pricing_parameters[1] = action[1] # Rate-based term (alpha) + elif len(action) == 1: + self.charging_hub.operator.pricing_parameters[0] = action[0] # Fixed term (p_0) + + # For continuous pricing, we don't use price_pairs directly + # Instead, vehicles calculate their own price using the price_function: + # price = p_0 + alpha * power^degree + print(f"Applied continuous pricing action {action} to pricing_parameters: {self.charging_hub.operator.pricing_parameters}") + + elif pricing_mode == "Discrete": + # For discrete pricing, apply to price_pairs + if hasattr(self.charging_hub.operator, 'price_pairs'): + from utilities.rl_environments.rl_pricing_env import convert_to_vector + if isinstance(action, (int, np.integer)): + vector_prices = convert_to_vector(action) + for i, price in enumerate(vector_prices): + if i < len(self.charging_hub.operator.price_pairs): + self.charging_hub.operator.price_pairs[i, 1] = price + else: + # Action is already a vector + for i, price in enumerate(action): + if i < len(self.charging_hub.operator.price_pairs): + self.charging_hub.operator.price_pairs[i, 1] = price + print(f"Applied discrete pricing action {action} to price_pairs: {self.charging_hub.operator.price_pairs}") + + except Exception as e: + print(f"Failed to apply pricing action: {e}") + + # Advance the simulation by one planning interval (typically 60 minutes) + planning_interval = getattr(self.charging_hub, 'planning_interval', 60) + current_time = self.sim_env.now + next_time = current_time + planning_interval + + # Run the simulation until the next time step + try: + self.sim_env.run(until=next_time) + except Exception as e: + # If simulation fails (e.g., data not available), mark as terminated + print(f"Simulation ended at time {self.sim_env.now} due to data limits") + return 0.0 # Return neutral reward + + # Choose reward calculation method based on configuration + if self.config.use_comprehensive_rewards: + # Use comprehensive reward calculation that consolidates simulation-based logic + reward = self._calculate_comprehensive_reward() + else: + # Use original simulation-based reward calculation + reward = self._calculate_simulation_based_reward() + return reward + + def _apply_charging_action(self, action: np.ndarray) -> float: + """Apply charging action and return reward.""" + # Store action in the charging hub's charging agent if it exists + if hasattr(self.charging_hub, 'charging_agent') and self.charging_hub.charging_agent is not None: + self.charging_hub.charging_agent.action = action + + # Advance the simulation by one planning interval (typically 60 minutes) + planning_interval = getattr(self.charging_hub, 'planning_interval', 60) + current_time = self.sim_env.now + next_time = current_time + planning_interval + + # Run the simulation until the next time step + try: + self.sim_env.run(until=next_time) + except Exception as e: + # If simulation fails (e.g., data not available), mark as terminated + print(f"Simulation ended at time {self.sim_env.now} due to data limits") + return 0.0 # Return neutral reward + + # Choose reward calculation method based on configuration + if self.config.use_comprehensive_rewards: + # Use comprehensive reward calculation that consolidates simulation-based logic + reward = self._calculate_comprehensive_reward() + else: + # Use original simulation-based reward calculation + reward = self._calculate_simulation_based_reward() + return reward + + def _apply_storage_action(self, action: np.ndarray) -> float: + """Apply storage action and return reward.""" + # Store action in the charging hub's storage agent if it exists + if hasattr(self.charging_hub, 'storage_agent') and self.charging_hub.storage_agent is not None: + self.charging_hub.storage_agent.action = action + + # Advance the simulation by one planning interval (typically 60 minutes) + planning_interval = getattr(self.charging_hub, 'planning_interval', 60) + current_time = self.sim_env.now + next_time = current_time + planning_interval + + # Run the simulation until the next time step + try: + self.sim_env.run(until=next_time) + except Exception as e: + # If simulation fails (e.g., data not available), mark as terminated + print(f"Simulation ended at time {self.sim_env.now} due to data limits") + return 0.0 # Return neutral reward + + # Choose reward calculation method based on configuration + if self.config.use_comprehensive_rewards: + # Use comprehensive reward calculation that consolidates simulation-based logic + reward = self._calculate_comprehensive_reward() + else: + # Use original simulation-based reward calculation + reward = self._calculate_simulation_based_reward() + return reward + + def _is_episode_done(self) -> bool: + """Check if the episode is done.""" + if hasattr(self.underlying_env, 'done'): + return self.underlying_env.done + else: + # Fallback: episode is never done by default + return False + + def render(self, mode: str = "human"): + """Render the environment (placeholder for now).""" + if mode == "human": + print(f"EVCH Environment - Agent: {self.agent_type.value}, Step: {self.episode_step}") + return None + + def close(self): + """Close the environment.""" + if hasattr(self.underlying_env, 'close'): + self.underlying_env.close() + + def _calculate_comprehensive_reward(self) -> float: + """ + Calculate comprehensive reward by moving simulation-based reward logic to the RL environment. + + This method consolidates reward calculation that was previously scattered across + simulation components, making the RL environment more self-contained. + + Returns: + float: Calculated reward value + """ + if not self.charging_hub or not hasattr(self.charging_hub, 'operator'): + return 0.0 + + try: + reward = 0.0 + operator = self.charging_hub.operator + + # 1. GRID USAGE AND PEAK PENALTIES + current_grid_usage = max(self.charging_hub.grid.grid_usage) if self.charging_hub.grid.grid_usage else 0 + current_peak_threshold = operator.peak_threshold + + # Peak threshold violation penalty (from operator.reward_computing) + if current_grid_usage > current_peak_threshold: + peak_penalty = (current_grid_usage - current_peak_threshold) * operator.peak_cost + reward -= peak_penalty + # Update peak threshold (as done in simulation) + operator.peak_threshold = current_grid_usage + + # 2. OBJECTIVE FUNCTION-BASED REWARD (from simulation model) + # Calculate revenue and costs similar to simulation model + total_revenue = 0.0 + total_energy_costs = self.charging_hub.grid.energy_costs if hasattr(self.charging_hub.grid, 'energy_costs') else 0.0 + + # Calculate revenue from served vehicles + requests = [r for r in operator.requests if r.ev == 1] + for request in requests: + if request.is_assigned and request.energy_requested > 0: + # Revenue from energy charged + energy_charged = min(request.energy_charged, request.energy_requested) + total_revenue += energy_charged * request.charging_price + + # Revenue from parking fees + total_revenue += request.park_duration * request.parking_fee + + # Penalty for missed energy (from simulation model) + energy_missed = max(request.energy_requested - request.energy_charged, 0) + if energy_missed > 0: + from resources.configuration.configuration import Configuration + missed_penalty = energy_missed * request.charging_price * Configuration.instance().energy_missed_penalty + total_revenue -= missed_penalty + + # Calculate operational costs + operational_costs = total_energy_costs + + # Peak charges (if applicable) + if hasattr(self.charging_hub, 'prices') and 'peak' in self.charging_hub.prices: + peak_charge = max((current_grid_usage - operator.peak_threshold) * self.charging_hub.prices['peak'], 0) + operational_costs += peak_charge + + # 3. OBJECTIVE FUNCTION CALCULATION (from simulation model) + if hasattr(self.charging_hub, 'objective'): + if self.charging_hub.objective == "min_costs": + # Calculate missed energy penalty + total_energy_missed = sum( + max(r.energy_requested - r.energy_charged, 0) + for r in requests if r.energy_requested > 0 + ) + missed_penalty = total_energy_missed * getattr(self.charging_hub, 'penalty_for_missed_kWh', 1.0) + objective_value = missed_penalty + operational_costs + elif self.charging_hub.objective == "max_profits": + objective_value = total_revenue - operational_costs + else: + objective_value = total_revenue - operational_costs + else: + objective_value = total_revenue - operational_costs + + # 4. OBJECTIVE FUNCTION CHANGE REWARD (from operator.reward_computing) + if hasattr(operator, 'objective'): + objective_change = objective_value - operator.objective + reward -= objective_change # Negative because we want to minimize costs/maximize profits + operator.objective = objective_value + else: + operator.objective = objective_value + + # 5. SERVICE LEVEL REWARD (from simulation model) + served_requests = sum(1 for r in requests if r.energy_requested > 0 and r.energy_charged > 0) + total_requests = sum(1 for r in requests if r.energy_requested > 0) + if total_requests > 0: + service_level = served_requests / total_requests + service_reward = service_level * 0.5 # Reward for good service level + reward += service_reward + + # 6. PRICING-SPECIFIC REWARDS (existing logic) + if self.agent_type == AgentType.PRICING and hasattr(operator, 'pricing_parameters'): + pricing_params = operator.pricing_parameters + if len(pricing_params) >= 2: + base_price = pricing_params[0] + capacity_price = pricing_params[1] + + # Count rejected vehicles due to pricing + rejected_vehicles = sum(1 for r in requests + if r.ev == 1 and r.energy_requested == 0 and r.charging_price > 0) + + # Strong penalty for pricing that causes vehicle rejections + if rejected_vehicles > 0: + rejection_penalty = rejected_vehicles * 0.5 + reward -= rejection_penalty + + # Penalty for extreme pricing + if base_price > 1.2 or base_price < 0.3: + reward -= 0.3 + elif 0.4 <= base_price <= 1.0: + reward += 0.3 + + if capacity_price > 0.3 or capacity_price < 0.01: + reward -= 0.2 + elif 0.02 <= capacity_price <= 0.2: + reward += 0.2 + + # 7. GRID EFFICIENCY REWARD (existing logic) + if current_grid_usage <= current_peak_threshold: + efficiency_ratio = current_grid_usage / current_peak_threshold + if 0.7 <= efficiency_ratio <= 0.9: + reward += 0.3 + elif efficiency_ratio < 0.3: + reward -= 0.1 + elif efficiency_ratio > 0.95: + reward -= 0.2 + + # 8. VEHICLE SERVICE REWARD (existing logic) + active_vehicles = sum(1 for r in requests if r.mode in ["Connected", "Charging"]) + if active_vehicles > 0: + service_reward = min(active_vehicles * 0.3, 2.0) + reward += service_reward + + # 9. COMPLETION REWARD (existing logic) + completed_vehicles = sum(1 for r in requests if r.mode in ["Fully_charged", "Left"]) + completion_reward = completed_vehicles * 0.1 + reward += completion_reward + + # 10. TIME-BASED REWARD (existing logic) + time_factor = (self.sim_env.now % 1440) / 1440 + time_reward = 0.1 * time_factor + reward += time_reward + + return reward + + except Exception as e: + print(f"Comprehensive reward calculation failed: {e}") + return 0.0 + + def _calculate_simulation_based_reward(self) -> float: + """ + Calculate reward using the original simulation-based approach. + + This method maintains backward compatibility with the existing + simulation-based reward calculation logic. + + Returns: + float: Calculated reward value + """ + # This method contains the original reward calculation logic + # that was previously in the pricing action method + if hasattr(self.charging_hub, 'operator'): + try: + # Get current state metrics + current_grid_usage = max(self.charging_hub.grid.grid_usage) if self.charging_hub.grid.grid_usage else 0 + current_peak_threshold = self.charging_hub.operator.peak_threshold + + # Count active vehicles (charging or connected) + active_vehicles = sum(1 for request in self.charging_hub.operator.requests + if request.mode in ["Connected", "Charging"] and request.ev == 1) + + # Count completed vehicles (fully charged or left) + completed_vehicles = sum(1 for request in self.charging_hub.operator.requests + if request.mode in ["Fully_charged", "Left"] and request.ev == 1) + + # Count vehicles that rejected charging due to high prices + rejected_vehicles = sum(1 for request in self.charging_hub.operator.requests + if request.ev == 1 and request.energy_requested == 0 and request.charging_price > 0) + + # Calculate dynamic reward components + reward = 0.0 + + # 1. Grid usage penalty (negative reward for exceeding peak threshold) + if current_grid_usage > current_peak_threshold: + penalty = (current_grid_usage - current_peak_threshold) * 0.1 + reward -= penalty + + # 2. Dynamic service reward based on current pricing action + if hasattr(self.charging_hub.operator, 'pricing_parameters'): + pricing_params = self.charging_hub.operator.pricing_parameters + if len(pricing_params) >= 2: + # Reward for optimal pricing (not too high, not too low) + base_price = pricing_params[0] + capacity_price = pricing_params[1] + + # Strong penalty for pricing that causes vehicle rejections + if rejected_vehicles > 0: + rejection_penalty = rejected_vehicles * 0.5 # Strong penalty per rejected vehicle + reward -= rejection_penalty + + # Penalty for extreme pricing + if base_price > 1.2 or base_price < 0.3: + reward -= 0.3 + elif 0.4 <= base_price <= 1.0: + reward += 0.3 # Reward for reasonable pricing + + if capacity_price > 0.3 or capacity_price < 0.01: + reward -= 0.2 + elif 0.02 <= capacity_price <= 0.2: + reward += 0.2 # Reward for reasonable capacity pricing + + # 3. Grid efficiency reward (varies based on usage) + if current_grid_usage <= current_peak_threshold: + # Reward for efficient grid usage (closer to threshold = better) + efficiency_ratio = current_grid_usage / current_peak_threshold + if 0.7 <= efficiency_ratio <= 0.9: + reward += 0.3 # Sweet spot for efficiency + elif efficiency_ratio < 0.3: + reward -= 0.1 # Too low usage + elif efficiency_ratio > 0.95: + reward -= 0.2 # Too close to limit + + # 4. Vehicle service reward (varies based on demand) + if active_vehicles > 0: + # Reward for serving vehicles, but with diminishing returns + service_reward = min(active_vehicles * 0.3, 2.0) # Cap at 2.0 + reward += service_reward + + # 5. Completion reward (small incremental reward) + completion_reward = completed_vehicles * 0.1 # Smaller reward per completion + reward += completion_reward + + # 6. Time-based reward variation (encourage progress) + time_factor = (self.sim_env.now % 1440) / 1440 # Normalize to 0-1 over day + time_reward = 0.1 * time_factor # Small time-based reward + reward += time_reward + + return reward + + except Exception as e: + # If reward computation fails, return neutral reward + print(f"Simulation-based reward computation failed: {e}") + return 0.0 + else: + # Fallback: return 0 reward + return 0.0 + + +# Factory functions for easy environment creation +def make_pricing_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv: + """Create a pricing environment.""" + config = EVCHConfig(agent_type=AgentType.PRICING, **config_dict) + return EVCHGymEnv(config, **kwargs) + + +def make_charging_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv: + """Create a charging environment.""" + config = EVCHConfig(agent_type=AgentType.CHARGING, **config_dict) + return EVCHGymEnv(config, **kwargs) + + +def make_storage_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv: + """Create a storage environment.""" + config = EVCHConfig(agent_type=AgentType.STORAGE, **config_dict) + return EVCHGymEnv(config, **kwargs) + + +# Register environments with gym (optional, for gym.make() support) +try: + from gym.envs.registration import register + + register( + id='EVCH-Pricing-v0', + entry_point='utilities.rl_environments.evch_gym_env:make_pricing_env', + ) + + register( + id='EVCH-Charging-v0', + entry_point='utilities.rl_environments.evch_gym_env:make_charging_env', + ) + + register( + id='EVCH-Storage-v0', + entry_point='utilities.rl_environments.evch_gym_env:make_storage_env', + ) + +except ImportError: + # gym registration not available + pass diff --git a/utilities/rl_environments/rl_pricing_env.py b/utilities/rl_environments/rl_pricing_env.py index 2660911..c7c6260 100644 --- a/utilities/rl_environments/rl_pricing_env.py +++ b/utilities/rl_environments/rl_pricing_env.py @@ -3,409 +3,627 @@ import numpy as np import logging import pandas as pd +from typing import Optional, Dict, Any, Tuple, Union +from dataclasses import dataclass from resources.configuration.configuration import Configuration -k = 5 + +@dataclass +class PricingState: + """Data class for pricing environment state information.""" + storage_soc: float + pv_generation: float + electricity_price: float + peak_usage: float + avg_energy_demand: float + avg_power_demand: float + free_grid_capacity: float + + +@dataclass +class PricingConfig: + """Data class for pricing environment configuration.""" + number_power_options: int + maximum_power: float + evaluation: bool + pricing_mode: str + dynamic_fix_term_pricing: bool + capacity_pricing: bool + dynamic_parking_fee: bool + limiting_grid_capa: bool + dynamic_storage_scheduling: bool class PricingEnv(gym.Env): + """ + Gym environment for dynamic pricing in EV charging hub operations. + + This environment provides a standardized interface for RL agents to learn + optimal pricing strategies for EV charging services. + """ + metadata = {"render.modes": ["human"]} reward_range = (-float("inf"), float("inf")) spec = None - - def __init__(self, config, DQN=False): - # Set these in ALL subclasses - self.final_action_DQN = None - if DQN == True: - # self.action_space = spaces.Discrete(2*2*2*4*4*5*2*3*3) - self.action_space = spaces.Discrete(k**config.number_power_options) - else: - number_of_actions = config.number_power_options - 1 - if ( - Configuration.instance().dynamic_fix_term_pricing - and Configuration.instance().capacity_pricing - ): - number_of_actions = config.number_power_options - if Configuration.instance().dynamic_parking_fee: - number_of_actions = config.number_power_options - if Configuration.instance().limiting_grid_capa: - number_of_actions = config.number_power_options - if Configuration.instance().dynamic_storage_scheduling: - number_of_actions = config.number_power_options - self.action_space = spaces.Box( - low=0, - high=config.maximum_power, - shape=(number_of_actions,), - dtype=np.float64, - ) - if Configuration.instance().pricing_mode == "Discrete": - # action_size = config.number_power_options - action_size = 2 - self.action_space = spaces.Box( - low=0, - high=config.maximum_power, - shape=(action_size,), - dtype=np.float64, - ) - self.action_space.low[0] = 0.3 - self.action_space.high[0] = 1.5 - self.action_space.low[1] = 0.5 - self.action_space.high[1] = 1.5 - # TODO: hard coded - if config.number_power_options >= 3: - self.action_space.low[2], self.action_space.high[2] = 300, 800 - if config.number_power_options >= 4: - self.action_space.low[3], self.action_space.high[3] = -200, 200 - - if Configuration.instance().pricing_mode == "Continuous": - self.action_space.low[0] = 0 - self.action_space.high[0] = 1.5 - if Configuration.instance().limiting_grid_capa: - self.action_space.low[1] = 300 - self.action_space.high[1] = 600 - if Configuration.instance().dynamic_storage_scheduling: - self.action_space.low[1] = -200 - self.action_space.high[1] = +200 - if ( - Configuration.instance().dynamic_fix_term_pricing - and Configuration.instance().capacity_pricing - ): - self.action_space.low[0] = 0.5 - self.action_space.high[0] = 1.5 - self.action_space.low[1] = 0 - self.action_space.high[1] = 0.4 - if ( - Configuration.instance().dynamic_fix_term_pricing - and not Configuration.instance().capacity_pricing - ): - self.action_space.low[0] = 0.6 - self.action_space.high[0] = 1.5 - if Configuration.instance().dynamic_parking_fee: - self.action_space.low[1] = 0 - self.action_space.high[1] = 1 / 60 - # self.action_space.low[1] = 0.01 - # self.action_space.high[0] = 0.2 - # self.action_space.high[1] = 0.03 - - # self.observation_space = spaces.Box(low=0, high=1000000, shape= - # (config.number_chargers * 3 + 2 + 4, ), dtype=np.float64) - observation_shape = 2 + 3 + 2 - if Configuration.instance().dynamic_storage_scheduling: - observation_shape += 1 - self.observation_space = spaces.Box( - low=0, high=1000000, shape=(observation_shape,), dtype=np.float64 - ) - self.charging_hub = None - self.env = None - self.id = 1 - self.episode = 0 - self.current_step = 0 - self.reward = 0 - self._max_episode_steps = 50000000 + + # Constants + K = 5 # Base for discrete action encoding + MAX_EPISODE_STEPS = 50000000 + + def __init__(self, config: Any, DQN: bool = False, charging_hub: Optional[Any] = None, env: Optional[Any] = None): + """ + Initialize the pricing environment. + + Args: + config: Configuration object containing environment parameters + DQN: Whether to use discrete action space for DQN + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + """ + super().__init__() + self.config = config self.evaluation = config.evaluation - self.total_reward = dict( - missed=0, feasibility=0, energy=0, feasibility_storage=0, test=0 - ) - self.config = config + self.is_dqn = DQN + + # Single reward calculation approach + # No longer using comprehensive rewards - keeping it simple + + # Initialize action and observation spaces + self._init_action_space() + self._init_observation_space() + + # Environment state + self.charging_hub = charging_hub + self.env = env + self.current_step = 0 + self.reward = 0.0 self.action = None + self.final_action_DQN = None + + # Episode tracking + self.episode = 0 + self.total_reward = { + "profit": 0, + "feasibility": 0, + "energy": 0, + "feasibility_storage": 0, + "test": 0 + } + + # Action range for continuous actions if not DQN: self.action_range = [self.action_space.low, self.action_space.high] - - def rescale_action(self, action): + + def _init_action_space(self) -> None: + """Initialize the action space based on configuration.""" + if self.is_dqn: + self._init_discrete_action_space() + else: + self._init_continuous_action_space() + + def _init_discrete_action_space(self) -> None: + """Initialize discrete action space for DQN.""" + self.action_space = spaces.Discrete(self.K ** self.config.number_power_options) + + def _init_continuous_action_space(self) -> None: + """Initialize continuous action space for other algorithms.""" + number_of_actions = self._calculate_number_of_actions() + + self.action_space = spaces.Box( + low=0, + high=self.config.maximum_power, + shape=(number_of_actions,), + dtype=np.float64 + ) + + self._configure_action_space_bounds() + + def _calculate_number_of_actions(self) -> int: + """Calculate the number of actions based on configuration.""" + config = Configuration.instance() + number_of_actions = self.config.number_power_options - 1 + + # Adjust based on pricing features + if config.dynamic_fix_term_pricing and config.capacity_pricing: + number_of_actions = self.config.number_power_options + if config.dynamic_parking_fee: + number_of_actions = self.config.number_power_options + if config.limiting_grid_capa: + number_of_actions = self.config.number_power_options + if config.dynamic_storage_scheduling: + number_of_actions = self.config.number_power_options + + return number_of_actions + + def _configure_action_space_bounds(self) -> None: + """Configure action space bounds based on pricing mode.""" + config = Configuration.instance() + + if config.pricing_mode == "Discrete": + self._configure_discrete_mode_bounds() + elif config.pricing_mode == "Continuous": + self._configure_continuous_mode_bounds() + + def _configure_discrete_mode_bounds(self) -> None: + """Configure bounds for discrete pricing mode.""" + action_size = 2 # Default for discrete mode + self.action_space = spaces.Box( + low=0, + high=self.config.maximum_power, + shape=(action_size,), + dtype=np.float64 + ) + + # Set specific bounds for discrete pricing + self.action_space.low[0] = 0.3 + self.action_space.high[0] = 1.5 + self.action_space.low[1] = 0.5 + self.action_space.high[1] = 1.5 + + # Handle additional power options + if self.config.number_power_options >= 3: + self.action_space.low[2] = 300 + self.action_space.high[2] = 800 + if self.config.number_power_options >= 4: + self.action_space.low[3] = -200 + self.action_space.high[3] = 200 + + def _configure_continuous_mode_bounds(self) -> None: + """Configure bounds for continuous pricing mode.""" + config = Configuration.instance() + + # Base bounds + self.action_space.low[0] = 0 + self.action_space.high[0] = 1.5 + + # Adjust based on features + if config.limiting_grid_capa: + self.action_space.low[1] = 300 + self.action_space.high[1] = 600 + if config.dynamic_storage_scheduling: + self.action_space.low[1] = -200 + self.action_space.high[1] = 200 + if config.dynamic_fix_term_pricing and config.capacity_pricing: + self.action_space.low[0] = 0.5 + self.action_space.high[0] = 1.5 + self.action_space.low[1] = 0 + self.action_space.high[1] = 0.4 + if config.dynamic_fix_term_pricing and not config.capacity_pricing: + self.action_space.low[0] = 0.6 + self.action_space.high[0] = 1.5 + if config.dynamic_parking_fee: + self.action_space.low[1] = 0 + self.action_space.high[1] = 1 / 60 + + def _init_observation_space(self) -> None: + """Initialize the observation space.""" + observation_shape = self._calculate_observation_shape() + + self.observation_space = spaces.Box( + low=0, + high=1000000, + shape=(observation_shape,), + dtype=np.float64 + ) + + def _calculate_observation_shape(self) -> int: + """Calculate the observation space shape.""" + base_shape = 2 + 3 + 2 # Time encoding + base features + demand features + + if Configuration.instance().dynamic_storage_scheduling: + base_shape += 1 + + return base_shape + + def rescale_action(self, action: np.ndarray) -> np.ndarray: + """ + Rescale action from normalized range to actual range. + + Args: + action: Normalized action from agent + + Returns: + Rescaled action in actual range + """ return ( action * (self.action_range[1] - self.action_range[0]) / 2.0 + (self.action_range[1] + self.action_range[0]) / 2.0 ) - - def get_final_prices_DQN(self, actions): - + + def get_final_prices_DQN(self, actions: np.ndarray) -> np.ndarray: + """ + Convert DQN actions to final pricing values. + + Args: + actions: Raw actions from DQN agent + + Returns: + Final pricing values + """ final_action = actions.copy() + if len(actions) == 1: - for i in range(len(actions)): - final_action[i] = actions[i] * 0.1 + 0.3 - if len(actions) == 2: + final_action[0] = actions[0] * 0.1 + 0.3 + elif len(actions) == 2: for i in range(len(actions)): final_action[i] = actions[i] * 0.1 + 0.4 * i + 0.2 * (1 - i) + self.final_action_DQN = final_action return final_action - - # def get_state(self, charging_hub=None, env=None): - # state = np.array([]) - # if not env: - # hour = 0 - # hour = np.array(hour) - # # hour = np.eye(24)[hour] - # - # normalized_hour = hour / 24 / 4 - # - # # Map normalized hour to angle in radians - # angle = normalized_hour * 2 * np.pi - # - # # Encode angle using sinusoidal functions - # sin_encoding = np.sin(angle) - # cos_encoding = np.cos(angle) - # day = 0 - # day = np.array(day) - # day = np.eye(5)[day] - # else: - # hour = (env.now%1440 - env.now%charging_hub.planning_interval) / charging_hub.planning_interval - # hour = np.array(int(hour)) - # normalized_hour = hour / 24 / (60/charging_hub.planning_interval) - # - # # Map normalized hour to angle in radians - # angle = normalized_hour * 2 * np.pi - # - # # Encode angle using sinusoidal functions - # sin_encoding = np.sin(angle) - # cos_encoding = np.cos(angle) - # # hour = np.eye(24)[hour] - # - # day = (env.now - env.now % 1440)/1440 - # day = np.array(int(day)) - # day = np.eye(5)[day] - # state = np.append(state, np.array([sin_encoding, cos_encoding])) - # - # # state = np.append(state, np.array([day])) - # if not charging_hub: - # storage_SoC = 0 - # free_grid_capa = 0 - # PV = 0 - # electricity_price = 0 - # peak_usage = 0 - # avg_energy_demand = 0 - # avg_power_demand = 0 - # state = np.append(state, np.array([free_grid_capa, PV, electricity_price, peak_usage])) - # for i in range(self.config.number_chargers): - # for _ in range(4): - # energy_demand = 0 - # charging_id = 0 - # # Time of Departure - # ToD = 0 - # state = np.append(state, np.array([energy_demand, ToD, charging_id])) - # else: - # storage_SoC = charging_hub.electric_storage.SoC - # PV = charging_hub.operator.generation_min - # hour = (env.now % 1440 - env.now % 60) / 60 - # electricity_price = charging_hub.electricity_tariff[int(hour)] - # peak_usage = charging_hub.operator.peak_threshold - # avg_energy_demand = 0 - # avg_power_demand = 0 - # if charging_hub.operator.free_grid_capa_actual == 0: - # free_grid_capa = charging_hub.operator.free_grid_capa_actual - # else: - # free_grid_capa = charging_hub.operator.free_grid_capa_actual[0] - # - # state = np.append(state, np.array([free_grid_capa / 1000, PV / 10, electricity_price, peak_usage / 500])) - # - # for charger in charging_hub.chargers: - # vehicles = charger.connected_vehicles - # charger_state = np.zeros(charger.number_of_connectors * 3) - # for j in range(len(vehicles)): - # charger_state[j * 3 + 0] = vehicles[j].remaining_energy_deficit / 50 - # charger_state[j * 3 + 1] = vehicles[j].remaining_park_duration / 1000 - # charger_state[j * 3 + 2] = vehicles[j].charging_price / 4 - # state = np.append(state, charger_state) - # - # return state - - def get_state(self, charging_hub=None, env=None): + + def get_state(self, charging_hub: Optional[Any] = None, env: Optional[Any] = None) -> np.ndarray: + """ + Get the current state of the environment. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + State vector as numpy array + """ state = np.array([]) + + # Add time encoding + time_encoding = self._get_time_encoding(charging_hub, env) + state = np.append(state, time_encoding) + + # Add system state + system_state = self._get_system_state(charging_hub, env) + state = np.append(state, system_state) + + return state + + def _get_time_encoding(self, charging_hub: Optional[Any], env: Optional[Any]) -> np.ndarray: + """ + Get time encoding for the current state. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + Time encoding vector + """ if not env: - hour = 0 - hour = np.array(hour) - # hour = np.eye(24)[hour] - - normalized_hour = hour / 24 / 4 - - # Map normalized hour to angle in radians - angle = normalized_hour * 2 * np.pi - - # Encode angle using sinusoidal functions - sin_encoding = np.sin(angle) - cos_encoding = np.cos(angle) - day = 0 - day = np.array(day) - day = np.eye(5)[day] + return self._get_default_time_encoding() else: - hour = ( - env.now % 1440 - env.now % charging_hub.planning_interval - ) / charging_hub.planning_interval - hour = np.array(int(hour)) - normalized_hour = hour / 24 / (60 / charging_hub.planning_interval) - - # Map normalized hour to angle in radians - angle = normalized_hour * 2 * np.pi - - # Encode angle using sinusoidal functions - sin_encoding = np.sin(angle) - cos_encoding = np.cos(angle) - # hour = np.eye(24)[hour] - - day = (env.now - env.now % 1440) / 1440 - day = np.array(int(day)) - day = np.eye(5)[day] - state = np.append(state, np.array([sin_encoding, cos_encoding])) - - # state = np.append(state, np.array([day])) + return self._get_simulation_time_encoding(charging_hub, env) + + def _get_default_time_encoding(self) -> np.ndarray: + """Get default time encoding when no simulation is running.""" + hour = 0 + normalized_hour = hour / 24 / 4 + angle = normalized_hour * 2 * np.pi + + sin_encoding = np.sin(angle) + cos_encoding = np.cos(angle) + + return np.array([sin_encoding, cos_encoding]) + + def _get_simulation_time_encoding(self, charging_hub: Any, env: Any) -> np.ndarray: + """ + Get time encoding from simulation. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + Time encoding vector + """ + hour = ( + env.now % 1440 - env.now % charging_hub.planning_interval + ) / charging_hub.planning_interval + hour = int(hour) + + normalized_hour = hour / 24 / (60 / charging_hub.planning_interval) + angle = normalized_hour * 2 * np.pi + + sin_encoding = np.sin(angle) + cos_encoding = np.cos(angle) + + return np.array([sin_encoding, cos_encoding]) + + def _get_system_state(self, charging_hub: Optional[Any], env: Optional[Any]) -> np.ndarray: + """ + Get system state information. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + System state vector + """ if not charging_hub: - storage_SoC = 0 - free_grid_capa = 0 - PV = 0 - electricity_price = 0 - peak_usage = 0 - avg_energy_demand = 0 - avg_power_demand = 0 - # state = np.append(state, np.array([free_grid_capa, PV, electricity_price, peak_usage, avg_energy_demand, avg_power_demand])) - if Configuration.instance().dynamic_storage_scheduling: - state = np.append( - state, - np.array( - [ - storage_SoC, - PV, - electricity_price, - peak_usage, - avg_energy_demand, - avg_power_demand, - ] - ), - ) - else: - state = np.append( - state, - np.array( - [ - PV, - electricity_price, - peak_usage, - avg_energy_demand, - avg_power_demand, - ] - ), - ) + return self._get_default_system_state() + else: + return self._get_simulation_system_state(charging_hub, env) + + def _get_default_system_state(self) -> np.ndarray: + """Get default system state when no simulation is running.""" + default_state = [0, 0, 0, 0, 0, 0] # All zeros for default + + if Configuration.instance().dynamic_storage_scheduling: + return np.array(default_state) + else: + return np.array(default_state[1:]) # Exclude storage SoC + + def _get_simulation_system_state(self, charging_hub: Any, env: Any) -> np.ndarray: + """ + Get system state from simulation. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + System state vector + """ + pricing_state = self._extract_pricing_state(charging_hub, env) + + # Normalize values + normalized_state = [ + pricing_state.storage_soc / 300, + pricing_state.pv_generation / 500, + pricing_state.electricity_price, + pricing_state.peak_usage / 1000, + pricing_state.avg_energy_demand / 1000, + pricing_state.avg_power_demand / 10 + ] + + if Configuration.instance().dynamic_storage_scheduling: + return np.array(normalized_state) else: - storage_SoC = charging_hub.electric_storage.SoC - PV = charging_hub.operator.non_dispatchable_generator.generation_profile_actual.loc[ + return np.array(normalized_state[1:]) # Exclude storage SoC + + def _extract_pricing_state(self, charging_hub: Any, env: Any) -> PricingState: + """ + Extract pricing state from charging hub. + + Args: + charging_hub: Reference to the charging hub + env: Reference to the simulation environment + + Returns: + PricingState object + """ + # Extract storage state + storage_soc = charging_hub.electric_storage.SoC + + # Extract PV generation with error handling + try: + pv_generation = charging_hub.operator.non_dispatchable_generator.generation_profile_actual.loc[ env.now, "pv_generation" ] - hour = (env.now % 1440 - env.now % 60) / 60 - electricity_price = charging_hub.electricity_tariff[int(hour)] - peak_usage = charging_hub.operator.peak_threshold - avg_energy_demand = 0 - avg_power_demand = 0 - if charging_hub.operator.free_grid_capa_actual == 0: - free_grid_capa = charging_hub.operator.free_grid_capa_actual - else: - free_grid_capa = charging_hub.operator.free_grid_capa_actual[0] - - for charger in charging_hub.chargers: - vehicles = charger.connected_vehicles - for j in range(len(vehicles)): - avg_energy_demand += vehicles[j].remaining_energy_deficit - avg_power_demand += ( - vehicles[j].remaining_energy_deficit - / vehicles[j].remaining_park_duration - ) - - # state = np.append(state, np.array([free_grid_capa/1000, PV/500, electricity_price, peak_usage/1000, - # avg_energy_demand/1000, avg_power_demand/10])) - if Configuration.instance().dynamic_storage_scheduling: - state = np.append( - state, - np.array( - [ - storage_SoC / 300, - PV / 500, - electricity_price, - peak_usage / 1000, - avg_energy_demand / 1000, - avg_power_demand / 10, - ] - ), + except KeyError: + # If time index doesn't exist, return 0 (no generation) + pv_generation = 0.0 + + # Extract electricity price + hour = (env.now % 1440 - env.now % 60) / 60 + electricity_price = charging_hub.electricity_tariff[int(hour)] + + # Extract peak usage + peak_usage = charging_hub.operator.peak_threshold + + # Calculate demand metrics + avg_energy_demand, avg_power_demand = self._calculate_demand_metrics(charging_hub) + + # Extract grid capacity + free_grid_capacity = self._extract_grid_capacity(charging_hub) + + return PricingState( + storage_soc=storage_soc, + pv_generation=pv_generation, + electricity_price=electricity_price, + peak_usage=peak_usage, + avg_energy_demand=avg_energy_demand, + avg_power_demand=avg_power_demand, + free_grid_capacity=free_grid_capacity + ) + + def _calculate_demand_metrics(self, charging_hub: Any) -> Tuple[float, float]: + """ + Calculate average energy and power demand. + + Args: + charging_hub: Reference to the charging hub + + Returns: + Tuple of (avg_energy_demand, avg_power_demand) + """ + avg_energy_demand = 0 + avg_power_demand = 0 + + for charger in charging_hub.chargers: + vehicles = charger.connected_vehicles + for vehicle in vehicles: + avg_energy_demand += vehicle.remaining_energy_deficit + avg_power_demand += ( + vehicle.remaining_energy_deficit / vehicle.remaining_park_duration ) + + return avg_energy_demand, avg_power_demand + + def _extract_grid_capacity(self, charging_hub: Any) -> float: + """ + Extract free grid capacity. + + Args: + charging_hub: Reference to the charging hub + + Returns: + Free grid capacity + """ + try: + free_grid_capa = charging_hub.operator.free_grid_capa_actual + if isinstance(free_grid_capa, list): + if len(free_grid_capa) > 0: + return free_grid_capa[0] + else: + return 0.0 else: - state = np.append( - state, - np.array( - [ - PV / 500, - electricity_price, - peak_usage / 1000, - avg_energy_demand / 1000, - avg_power_demand / 10, - ] - ), - ) - - # print(state) - - return state - - def step(self, action, charging_hub=None, env=None): + return free_grid_capa + except Exception as e: + # If we can't access grid capacity, return 0 + return 0.0 + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]: + """ + Take a step in the environment. + + Args: + action: Action to take + + Returns: + Tuple of (observation, reward, done, info) + """ self.current_step += 1 - reward = self._take_action(action, charging_hub, env) - done = self.current_step >= 100000000000000 - obs = self._next_observation(charging_hub, env) + reward = self._take_action(action) + done = self.current_step >= self.MAX_EPISODE_STEPS + obs = self._next_observation() + return obs, reward, done, {} - - def receive_action(self): + + def receive_action(self) -> Optional[np.ndarray]: + """Get the current action.""" return self.action - - def reset(self): - # Reset the state of the environment to an initial state + + def reset(self) -> np.ndarray: + """ + Reset the environment to initial state. + + Returns: + Initial observation + """ self.current_step = 0 self.reward = 0 - # self.state = self.get_state() + if not self.charging_hub: return self.get_state(None, None) return self.get_state(self.charging_hub, self.env) - - def render(self, mode="human", close=False): - print(self.reward) - - def _take_action(self, action, charging_hub=None, env=None): - - reward = 0 - # hour = int((self.env.now % 1440) / 60) - # prices = [0,0] - # DQN_pricing = convert_to_vector(action) - # for i in range(len(DQN_pricing)): - # DQN_pricing[i] = DQN_pricing[i] * 0.2 * (i + 1) - # for i in range(2): - # prices[i] = Configuration.instance().prices[i] - hour / 4 / 20 - # reward -= (prices[i] - DQN_pricing[i])**2 - - reward -= charging_hub.reward["missed"] - # reward -= charging_hub.reward['feasibility_storage'] * 0.1 - self.total_reward["missed"] += reward - - charging_hub.reward["missed"] = 0 - charging_hub.reward["feasibility_storage"] = 0 - charging_hub.reward["feasibility"] = 0 - return reward / 100 - - def _next_observation(self, charging_hub, env): - return self.get_state(charging_hub, env) + + def render(self, mode: str = "human", close: bool = False) -> None: + """Render the environment state.""" + if mode == "human": + print(f"Pricing Environment - Reward: {self.reward}") + + def _take_action(self, action: np.ndarray) -> float: + """ + Execute the action and calculate reward. + + Args: + action: Action to execute + + Returns: + Reward value + """ + # Store the action for the simulation to use + self.action = action + + # Calculate reward using the single reward calculation method + reward = self._calculate_reward(action) + + return reward + + + + def _calculate_reward(self, action: np.ndarray) -> float: + """ + Calculate reward using the simulation-based approach with operator.reward_computing(). + + This method uses the operator's reward computation as the primary source + and includes additional reward components for better learning. + + Args: + action: Action to execute + + Returns: + float: Calculated reward value + """ + reward = 0.0 + + if self.charging_hub: + # 1. OPERATOR REWARD COMPUTING (primary reward source) + # Use the operator's reward_computing method which includes: + # - Peak threshold violation penalties + # - Objective function change rewards + if hasattr(self.charging_hub, 'operator') and self.charging_hub.operator: + try: + operator_reward = self.charging_hub.operator.reward_computing() + reward += operator_reward + except Exception as e: + print(f"Operator reward computing failed: {e}") + + # 2. MISSED PENALTIES (from simulation) + profit = self.charging_hub.reward.get("profit", 0) + reward -= profit + self.total_reward["profit"] += reward + + # Reset rewards for next step + self.charging_hub.reward["profit"] = 0 + self.charging_hub.reward["feasibility_storage"] = 0 + self.charging_hub.reward["feasibility"] = 0 + + # 3. FALLBACK REWARD (ensure non-zero rewards for learning) + if reward == 0.0: + reward = 0.01 + + # Debug output (only print occasionally to avoid spam) + if hasattr(self, 'current_step'): + self.current_step += 1 + else: + self.current_step = 1 + + return reward + + def _next_observation(self) -> np.ndarray: + """ + Get the next observation. + + Returns: + Next observation + """ + return self.get_state(self.charging_hub, self.env) -def convert_to_scalar(a): - # print(a) +def convert_to_scalar(action_vector: np.ndarray) -> int: + """ + Convert action vector to scalar for discrete actions. + + Args: + action_vector: Vector of actions + + Returns: + Scalar action value + """ action = 0 for i in range(2): - action += a[i] * (5) ** (1 - i) - # print(action) + action += action_vector[i] * (5) ** (1 - i) return int(action) -def convert_to_vector(a, h=1): - # print(a) +def convert_to_vector(scalar_action: int, h: int = 1) -> np.ndarray: + """ + Convert scalar action to vector for discrete actions. + + Args: + scalar_action: Scalar action value + h: Height parameter for conversion + + Returns: + Vector of actions + """ action = np.zeros(2) j = 0 + for i in range(2): - action[i] = int((a - a % (k ** (h - j))) / (k ** (h - j))) - a = a % (k ** (h - j)) + action[i] = int((scalar_action - scalar_action % (5 ** (h - j))) / (5 ** (h - j))) + scalar_action = scalar_action % (5 ** (h - j)) j += 1 - # print(action) + return action diff --git a/utilities/training_manager.py b/utilities/training_manager.py new file mode 100644 index 0000000..68ce0b9 --- /dev/null +++ b/utilities/training_manager.py @@ -0,0 +1,161 @@ +""" +Training Manager Module + +This module handles training operations for learnable agents in the EVCC simulation framework. +""" + +from typing import Optional, List +from resources.configuration.configuration import Configuration +from utilities.sim_input_processing import sample_week +from run_simulation import run_single_simulation +import pandas as pd + + +def run_learnable_agent_training(agent_type: str, agent, decision_type: str, config: Configuration) -> None: + """ + Run training for learnable agents (RL agents). + + Args: + agent_type: Type of agent (RL_SAC, RL_DQN, RL_DDPG) + agent: The agent instance to train + decision_type: Type of decision (pricing, charging, storage, routing) + config: Configuration instance + """ + print(f"Starting training for {decision_type} agent: {agent_type}") + + # Check if hyperparameter tuning is enabled + enable_hyperparameter_tuning = getattr(config, 'enable_hyperparameter_tuning', False) + + if enable_hyperparameter_tuning and decision_type == "pricing": + print("Hyperparameter tuning enabled - running find_best_parameters()") + from utilities.hyperparameter_tuner import find_best_parameters + find_best_parameters(agent, config) + else: + print("Running standard training...") + run_standard_training(agent, decision_type, config) + + print(f"Training completed for {decision_type} agent: {agent_type}") + + +def run_standard_training(agent, decision_type: str, config: Configuration, return_rewards: bool = False): + """ + Run standard training for the agent. + + Args: + agent: The agent instance to train + decision_type: Type of decision (pricing, charging, storage, routing) + config: Configuration instance + return_rewards: Whether to return reward history for hyperparameter tuning + + Returns: + List of rewards if return_rewards=True, otherwise None + """ + print(f"Running standard training for {decision_type} agent...") + + # Training parameters + NUMBER_EPISODES = 301 + if config.pricing_mode == "perfect_info": + NUMBER_EPISODES = 1 + + training_results = pd.DataFrame([]) + episode = 1 + output = [] + + while episode <= NUMBER_EPISODES: + # Sample training week + START = sample_week( + sim_seasons=config.SIM_SEASON, + summer_start=config.SUMMER_START, + summer_end=config.SUMMER_END, + seed=42, + ) + print(f"Episode {episode}: Training on week starting {START}") + + # Set evaluation mode + evaluation_episodes = 10 + time_to_learn = agent.hyperparameters.get("min_steps_before_learning", 1000) + + if config.evaluation_after_training: + evaluation_episodes = 1 + time_to_learn = 0 + + # Charger configuration + chargers = { + "fast_one": config.facility_size, + "fast_two": 0, + "fast_four": 0, + "slow_one": 0, + "slow_two": 0, + "slow_four": 0, + } + + # Check if evaluation is needed + if (episode % evaluation_episodes == 0 and + hasattr(agent, 'global_step_number') and + agent.global_step_number >= time_to_learn): + + agent.do_evaluation_iterations = True + print(f"Episode {episode}: Running evaluation") + else: + agent.do_evaluation_iterations = False + + # Run simulation + try: + # During training, we don't want to save results every episode + # Only save during evaluation episodes or if explicitly requested + is_evaluation_episode = (episode % evaluation_episodes == 0 and + hasattr(agent, 'global_step_number') and + agent.global_step_number >= time_to_learn) + + # Prepare results parameters only for evaluation episodes + results_params = None + if is_evaluation_episode: + results_params = [f"{getattr(config, 'POST_FIX', 'sim')}", f"state{9}", f"week{episode}"] + + df = run_single_simulation( + charging_agent=None, # Will be set by the simulation + storage_agent=None, # Will be set by the simulation + pricing_agent=agent, + num_charger=chargers, + turn_off_monitoring=False, + turn_on_results=results_params, # Only save results during evaluation + turn_on_plotting=is_evaluation_episode, # Only plot during evaluation + transformer_num=config.TRANSFORMER_NUM, + storage_capa=config.STORAGE_SIZE, + pv_capa=config.PV_INSTALLED_CAPA, + year=9, + start_day=START, + ) + + # Update learning rate if supported + if hasattr(agent, 'update_lr'): + agent.update_lr(new_objective=df["profit"], episode=episode) + + # Save training results only during evaluation episodes and if enabled + if (is_evaluation_episode and + not config.evaluation_after_training and + getattr(config, 'save_training_results', False)): + training_results = pd.concat([training_results, df]) + training_results.to_csv( + f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}.csv' + ) + + output.append(df["profit"].values[0]) + + # Print progress + if episode % 10 == 0: + print(f"Episode {episode}: Profit = {df['profit'].values[0]:.2f}") + + except Exception as e: + print(f"Error in episode {episode}: {e}") + output.append(0) # Default value on error + + episode += 1 + if hasattr(agent, 'episode_number'): + agent.episode_number += 1 + + print(f"Standard training completed for {decision_type} agent") + + if return_rewards: + return output[9:-1:10][-10:] if len(output) > 20 else output + return None