diff --git a/.gitignore b/.gitignore
index f0c94b0..5fcbd95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,8 @@ __pycache__/
 /Results/raw_output/
 /Cache/
 /new_notebooks_EVCH/
+/examples/
+/docs/
 
 # Root level data files
 chargingdata.csv
diff --git a/Learning_Pricing.py b/Learning_Pricing.py
deleted file mode 100644
index 2d3032c..0000000
--- a/Learning_Pricing.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from resources.configuration.configuration import Configuration
-from utilities.rl_agents.agents.actor_critic_agents.SAC import (
-    SAC,
-)
-from resources.configuration.SAC_configuration import (
-    config,
-    pricing_config,
-)
-from utilities.rl_environments.rl_pricing_env import PricingEnv
-from utilities.sim_input_processing import sample_week
-from resources.logging.log import lg
-import pandas as pd
-import numpy as np
-
-from run_simulation import run_single_simulation
-
-SIM_SEASON = Configuration.instance().SIM_SEASON
-SUMMER_START = Configuration.instance().SUMMER_START
-SUMMER_END = Configuration.instance().SUMMER_END
-POST_FIX = Configuration.instance().POST_FIX
-
-Configuration.instance().dynamic_pricing = True
-
-evaluate_after_training = Configuration.instance().evaluation_after_training
-number_of_chargers = 200
-PV_CAPA = Configuration.instance().PV
-STORAGE_CAPA = 0
-max_cap = 50
-max_grid_usage = 2000
-TRANSFORMER_NUM = Configuration.instance().grid
-
-# config.number_chargers = number_of_chargers
-# config.maximum_power = max_cap
-# config.maximum_grid_usage = max_grid_usage
-# config.environment = ChargingHubInvestmentEnv(config=config)
-# config.learnt_network = evaluate_after_training
-# agent = SAC(config)
-agent = None
-
-# storage_config.number_chargers = 80
-# storage_config.maximum_power = 50
-# storage_config.maximum_grid_usage = 200
-# storage_config.environment = StorageEnv(config=storage_config)
-# storage_config.learnt_network = False
-# storage_agent = SAC(storage_config)
-
-def run_experiments():
-    pricing_config.number_chargers = number_of_chargers
-    pricing_config.maximum_power = max_cap
-    pricing_config.maximum_grid_usage = max_grid_usage
-    pricing_config.number_power_options = len(Configuration.instance().energy_prices) + 0
-    pricing_config.environment = PricingEnv(config=pricing_config, DQN=False)
-    pricing_config.learnt_network = evaluate_after_training
-    pricing_config.evaluation_after_training = evaluate_after_training
-
-    pricing_agent = SAC(pricing_config)
-    training_results = pd.DataFrame([])
-    episode = 1
-    NUMBER_EPISODES = 301
-    if Configuration.instance().pricing_mode == "perfect_info":
-        NUMBER_EPISODES = 1
-    output = []
-    while episode <= NUMBER_EPISODES:
-        START = sample_week(
-            sim_seasons=SIM_SEASON,
-            summer_start=SUMMER_START,
-            summer_end=SUMMER_END,
-            seed=42,
-        )
-        print(START)
-        # week = random.sample(TRAIN_WEEKS, 1)
-        # week = START
-        results = None
-        off_monitoring = True
-        evaluation_episodes = 10
-        time_to_learn = pricing_agent.hyperparameters["min_steps_before_learning"]
-        if evaluate_after_training:
-            off_monitoring = False
-            evaluation_episodes = 1
-            results = [f"{POST_FIX}", f"state{9}", f"week{1}"]
-            time_to_learn = 0
-        # chargers = {'fast_one': 5, 'fast_two': 40, 'fast_four': 5, 'slow_one': 5, 'slow_two': 10, 'slow_four': 0}
-        chargers = {
-            "fast_one": number_of_chargers,
-            "fast_two": 0,
-            "fast_four": 0,
-            "slow_one": 0,
-            "slow_two": 0,
-            "slow_four": 0,
-        }
-        lg.error(f"episode: {episode}", extra={"clazz": "", "oid": ""})
-        if (
-            episode % evaluation_episodes == 0
-            and pricing_agent.global_step_number >= time_to_learn
-        ):
-            if agent:
-                agent.do_evaluation_iterations = True
-            pricing_agent.do_evaluation_iterations = True
-            ### activate when we have separate battery agent
-            # storage_agent.do_evaluation_iterations = True
-            df = run_single_simulation(
-                charging_agent=agent,
-                storage_agent=None,
-                pricing_agent=pricing_agent,
-                num_charger=chargers,
-                turn_off_monitoring=False,
-                turn_on_results=results,
-                turn_on_plotting=True,
-                transformer_num=TRANSFORMER_NUM,
-                storage_capa=STORAGE_CAPA,
-                pv_capa=PV_CAPA,
-                year=9,
-                start_day=START,
-            )
-            pricing_agent.update_lr(new_objective=df["profit"], episode=episode)
-            print(
-                pricing_agent.alpha,
-                pricing_agent.learning_rate_actor,
-                max(pricing_agent.objective_function),
-                pricing_agent.hyperparameters["Critic"]["tau"],
-                pricing_agent.hyperparameters["batch_size"],
-                pricing_agent.action_size,
-            )
-        else:
-            if agent:
-                agent.do_evaluation_iterations = False
-            pricing_agent.do_evaluation_iterations = False
-            ### activate when we have separate battery agent
-            # storage_agent.do_evaluation_iterations = False
-            df = run_single_simulation(
-                charging_agent=agent,
-                storage_agent=None,
-                pricing_agent=pricing_agent,
-                num_charger=chargers,
-                turn_off_monitoring=False,
-                turn_on_results=results,
-                turn_on_plotting=True,
-                transformer_num=TRANSFORMER_NUM,
-                storage_capa=STORAGE_CAPA,
-                pv_capa=PV_CAPA,
-                year=9,
-                start_day=START,
-            )
-
-        episode += 1
-        pricing_agent.episode_number += 1
-        if not Configuration.instance().evaluation_after_training:
-            training_results = pd.concat([training_results, df])
-            training_results.to_csv(Configuration.instance().OUTPUT_DATA_PATH+
-                f"training_results_{pricing_agent.config.name}.csv"
-            )
-        output.append(df["profit"].values[0])
-    # print(output)
-    return output[9:-1:10][-10:]
-
-
-def find_best_parameters():
-    try:
-        training_results = pd.read_csv(f'training_results_{config.path}.csv')
-    except:
-        training_results = pd.DataFrame(columns=['learning_rate', 'batch_size', 'tau', 'result'])
-    training_dict = {}
-    best_results = -10000000000
-    best_parameters = {'learning_rate': 0, 'batch_size': 0, 'tau': 0}
-    for lr in [5e-5, 1e-4, 5e-4, 1e-3]:
-        for bs in [64, 256, 512]:
-            for tau in [0.05, 0.1]:
-                pricing_config.hyperparameters['batch_size'] = bs
-                pricing_config.hyperparameters['Actor']['learning_rate'] = lr
-                pricing_config.hyperparameters['Critic']['learning_rate'] = lr
-                pricing_config.hyperparameters['Actor']['tau'] = tau
-                pricing_config.hyperparameters['Critic']['tau'] = tau
-                pricing_config.hyperparameters['min_steps_before_learning'] = max(bs, 256)
-                mean_reward = run_experiments()
-                # print('Mean reward: ', mean_reward)
-                hyperparameters = {'learning_rate': lr, 'batch_size': bs, 'tau': tau}
-                if np.array(mean_reward).mean() > best_results:
-                    best_results = np.array(mean_reward).mean()
-                    best_parameters = hyperparameters
-                results_dict = {'result': mean_reward}
-                training_results = pd.concat(
-                    [pd.DataFrame([[lr, bs, tau, mean_reward]], columns=training_results.columns),
-                     training_results], ignore_index=True)
-                print(f'{hyperparameters}, {results_dict}, best: {best_results}, best_parameters: {best_parameters}')
-                training_results.to_csv(f'{Configuration.instance().OUTPUT_DATA_PATH}training_results_{pricing_config.name}_tuning.csv', index=False)
-
-# find_best_parameters()
-run_experiments()
\ No newline at end of file
diff --git a/README.md b/README.md
index 0c730d8..4437abf 100644
--- a/README.md
+++ b/README.md
@@ -7,20 +7,181 @@ EVCCs are expected to become a core component of the future charging portfolio o
 
 This simulation is intended to explore these interdependencies through extensive sensitivity testing and through testing new algorithms and models for sizing and operating EVCCs. The module structure is as follows:
 
-## Module structure
+## 🏗️ Architecture
+
+The EVCC simulation framework is built with a modular, decoupled architecture that separates concerns and enables easy integration with different RL algorithms and libraries.
+
+### High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        EVCC Simulation                          │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────┐ │
+│  │ Preferences │  │Infrastructure│  │ Operations  │  │ Results │ │
+│  │   Module    │  │   Module    │  │   Module    │  │ Module  │ │
+│  └─────────────┘  └─────────────┘  └─────────────┘  └─────────┘ │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                    RL Agent Integration                         │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌─────────────────┐    ┌──────────────────┐    ┌─────────────┐ │
+│  │   RL Library    │    │   Gym Adapter    │    │   EVCH Gym  │ │
+│  │  (Stable-Bas3,  │───▶│   (Standard      │───▶│ Environment │ │
+│  │   RLlib, etc.)  │    │   Interface)     │    │  (Wrapper)  │ │
+│  └─────────────────┘    └──────────────────┘    └─────────────┘ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Module Structure
+
 The following modules are included:
+
 - **`Preferences` Module:** Initializes vehicle objects with respective charging and parking preferences (i.e., requests) based on empirical data
 - **`Infrastructure` Module:** Initializes infrastructure objects (EV supply equipment (EVSE), connectors per each EVSE, grid connection capacity, on-site storage and on-site generation (PV))  
-- **`Operations` Module:** Conatain algorithms for assigning physical space (vehicle routing) and electrical capacity (vehicle charging) to individual vehicle objects based on a pre-defined charging policy
+- **`Operations` Module:** Contains algorithms for assigning physical space (vehicle routing) and electrical capacity (vehicle charging) to individual vehicle objects based on a pre-defined charging policy
 - **`Results` Module:** Monitors EVCC activity in pre-defined intervals and accounts costs. Includes plotting routines.
 
+## 🆕 NEW: Agent Decision System
+
+The framework now includes a **unified agent decision system** that ensures **ALL decisions** in the EV charging operations are made by agents (RL agents, rule-based agents, algorithm agents, etc.) rather than being hardcoded in business logic.
+
+### Agent Decision System Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    Agent Decision System                       │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐              │
+│  │   Pricing   │  │   Charging  │  │   Storage   │              │
+│  │   Service   │  │   Service   │  │   Service   │              │
+│  └─────────────┘  └─────────────┘  └─────────────┘              │
+│         │                 │                │                    │
+│         ▼                 ▼                ▼                    │
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐              │
+│  │   Agent     │  │   Agent     │  │   Agent     │              │
+│  │ Decision    │  │ Decision    │  │ Decision    │              │
+│  └─────────────┘  └─────────────┘  └─────────────┘              │
+│         │                 │                │                    │
+│         └─────────────────┼────────────────┘                    │
+│                           ▼                                     │
+│  ┌─────────────────────────────────────────────────────────────┐ │
+│  │              Agent Decision System                          │ │
+│  │  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐            │ │
+│  │  │   RL SAC    │ │ Rule-Based  │ │ Algorithm   │            │ │
+│  │  │   Agent     │ │   Agent     │ │   Agent     │            │ │
+│  │  └─────────────┘ └─────────────┘ └─────────────┘            │ │
+│  └─────────────────────────────────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Supported Decision Types
+
+- **PRICING**: Energy prices, parking fees, dynamic pricing strategies
+- **CHARGING**: Charging power allocation, schedules, priority assignment
+- **STORAGE**: Energy storage operations, peak shaving, arbitrage
+- **ROUTING**: Vehicle routing, parking allocation, queue management
+- **VEHICLE_ASSIGNMENT**: Charging station assignment, connector allocation
+- **PARKING_ALLOCATION**: Parking space allocation, duration optimization
+- **GRID_MANAGEMENT**: Grid capacity management, load balancing
+- **DEMAND_FORECASTING**: Energy demand prediction, load forecasting
+
+### Supported Agent Types
+
+- **RL_SAC**: Soft Actor-Critic reinforcement learning agent
+- **RL_DQN**: Deep Q-Network reinforcement learning agent
+- **RL_DDPG**: Deep Deterministic Policy Gradient agent
+- **RULE_BASED**: Rule-based agents with predefined strategies
+- **HEURISTIC**: Algorithm agents that wrap existing algorithms
+- **OPTIMIZATION**: Mathematical optimization algorithms
+- **ML_MODEL**: Machine learning models (neural networks, etc.)
+
+### Algorithm Agents
+
+The system includes **algorithm agents** that wrap all existing charging, routing, and storage algorithms:
+
+#### **Charging Algorithm Agents**
+- `uncontrolled`, `first_come_first_served`, `earliest_deadline_first`
+- `least_laxity_first`, `equal_sharing`, `online_myopic`
+- `online_multi_period`, `integrated_storage`, `perfect_info`
+- `perfect_info_with_storage`
+
+#### **Routing Algorithm Agents**
+- `random`, `lowest_occupancy_first`, `fill_one_after_other`
+- `lowest_utilization_first`, `matching_supply_demand`, `minimum_power_requirement`
+
+#### **Storage Algorithm Agents**
+- `uncontrolled`, `temporal_arbitrage`, `peak_shaving`
+
+### Rule-Based Agents
+
+Pre-built rule-based agents for common strategies:
+
+#### **Pricing Agents**
+- **Time-of-Use**: Peak/off-peak pricing based on time
+- **Demand-Based**: Dynamic pricing based on current demand
+- **Cost-Plus**: Fixed markup over base electricity cost
+
+#### **Charging Agents**
+- **First-Come-First-Served**: Serve vehicles in arrival order
+- **Priority-Based**: Prioritize vehicles by energy deficit and departure time
+- **Load Balancing**: Distribute power evenly among vehicles
+
+#### **Storage Agents**
+- **Peak Shaving**: Discharge during high load, charge during low load
+- **Arbitrage**: Charge during low-price hours, discharge during high-price hours
+- **Grid Support**: Support grid frequency stability
+
+### RL Agent Architecture
+
+The simulation supports complete decoupling of RL agents through a standardized gym-like interface:
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    RL Agent Services                            │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐              │
+│  │  Pricing    │  │  Charging   │  │   Storage   │              │
+│  │  Service    │  │  Service    │  │  Service    │              │
+│  └─────────────┘  └─────────────┘  └─────────────┘              │
+│         │                 │                │                    │
+│         ▼                 ▼                ▼                    │
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐              │
+│  │  Pricing    │  │  Charging   │  │   Storage   │              │
+│  │  Agent      │  │  Agent      │  │   Agent     │              │
+│  └─────────────┘  └─────────────┘  └─────────────┘              │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Supported RL Libraries
+
+The framework supports integration with any gym-compatible RL library:
+
+- **Stable Baselines3**: SAC, PPO, DQN, A2C, TD3
+- **RLlib**: Distributed training, hyperparameter tuning
+- **Custom Agents**: Any agent implementing the gym interface
+- **Vectorized Environments**: Support for parallel training
+
+### Key Design Principles
+
+1. **Separation of Concerns**: RL logic is completely separated from simulation logic
+2. **Standardized Interfaces**: All agents conform to gym-compatible interfaces
+3. **Modularity**: Each service (pricing, charging, storage) is independent
+4. **Extensibility**: Easy to add new RL algorithms or modify existing ones
+5. **Scalability**: Support for distributed training and vectorized environments
+6. **Agent-First Design**: All decisions go through agents, no hardcoded logic
+7. **Backward Compatibility**: Existing algorithms are preserved and wrapped as agents
+8. **Comprehensive Tracking**: Every decision is logged and can be monitored
+
 ## 📦 Installation
 
 This project uses [`uv`](https://github.com/astral-sh/uv), a modern and ultra-fast Python package manager compatible with pip.
 
 ### 1. Install `uv`
 
-If you don’t have `uv` installed, run:
+If you don't have `uv` installed, run:
 
 ### Installation Steps
 
@@ -37,4 +198,144 @@ source .venv/bin/activate        # On macOS/Linux
 .venv\Scripts\activate           # On Windows
 
 # Step 4: Install dependencies
-uv pip install -r requirements.uv.txt
\ No newline at end of file
+uv pip install -r requirements.uv.txt
+```
+
+## 🚀 Quick Start
+
+### Running the Simulation
+
+The main entry point for the simulation is `main.py`. You can run it with different configuration files:
+
+```bash
+# Run with default configuration
+python main.py resources/configuration/ini_files/app-remote.ini
+
+# Run with custom configuration
+python main.py path/to/your/config.ini
+```
+
+### Configuration Files
+
+The simulation uses INI configuration files to set parameters for:
+- Environment settings (seasons, duration, facility size)
+- Infrastructure (chargers, grid capacity, storage, PV)
+- Agent types and strategies
+- Logging and monitoring options
+
+### Example Configuration
+
+```ini
+[AGENT_DECISION_SYSTEM]
+enabled = True
+pricing_agent_type = RULE_BASED
+charging_agent_type = HEURISTIC
+enable_hyperparameter_tuning = False
+
+[SETTINGS]
+log_level = INFO
+facility_size = 200
+```
+
+## 📚 Documentation
+
+- **[Agent Decision System Guide](docs/agent_decision_system.md)**: Comprehensive guide to the new agent system
+- **[Decision Request System](docs/decision_request_system.md)**: Details about the underlying decision tracking system
+- **[Algorithm Agents](utilities/rl_agents/algorithm_agents.py)**: Documentation of algorithm agents
+- **[Rule-Based Agents](utilities/rl_agents/rule_based_agents.py)**: Documentation of rule-based agents
+
+## 🔧 Examples
+
+- **[Agent Decision System Example](examples/agent_decision_system_example.py)**: Basic usage examples
+- **[Algorithm Agents Example](examples/algorithm_agents_example.py)**: Using algorithm agents
+- **[Decision Request Example](examples/decision_request_example.py)**: Decision tracking examples
+
+## 🎯 Key Benefits
+
+### 1. **Consistency**
+- All decisions follow the same pattern
+- Standardized interfaces and data structures
+- Consistent error handling and logging
+
+### 2. **Modularity**
+- Easy to add new agent types
+- Simple to switch between different strategies
+- Clear separation of concerns
+
+### 3. **Testability**
+- Agents can be tested independently
+- Mock agents for unit testing
+- Easy to compare different strategies
+
+### 4. **Observability**
+- Every decision is tracked and logged
+- Performance metrics for all agents
+- Decision history for analysis
+
+### 5. **Flexibility**
+- Support for multiple agent types
+- Easy to implement new strategies
+- Can mix different agent types
+
+### 6. **Maintainability**
+- Clear agent interfaces
+- Well-documented decision types
+- Easy to understand and modify
+
+### 7. **Backward Compatibility**
+- Existing algorithms are preserved as agents
+- No need to rewrite existing code
+- Gradual migration path
+
+## 🔄 Migration Guide
+
+To migrate existing code to use the agent decision system:
+
+1. **Identify Decision Points**: Find all places where decisions are made
+2. **Create Agents**: Implement agents for each decision type
+3. **Register Agents**: Register agents with the system
+4. **Replace Decision Logic**: Replace hardcoded logic with agent calls
+5. **Test and Monitor**: Verify behavior and monitor performance
+
+### Migration from Existing Algorithms
+
+```python
+# Before: Direct algorithm call
+first_come_first_served(
+    env=env,
+    connected_vehicles=vehicles,
+    charging_stations=charging_stations,
+    charging_capacity=500,
+    free_grid_capacity=300,
+    planning_period_length=15
+)
+
+# After: Using algorithm agent
+charging_agent = AlgorithmChargingAgent(algorithm="first_come_first_served")
+context = {
+    "env": env,
+    "charging_stations": charging_stations,
+    "charging_capacity": 500,
+    "free_grid_capacity": 300,
+    "planning_period_length": 15
+}
+decision = charging_agent.select_action(vehicles, context)
+```
+
+## 🚀 Future Enhancements
+
+- **Multi-Agent Coordination**: Agents that can coordinate with each other
+- **Adaptive Agents**: Agents that can switch strategies based on performance
+- **Distributed Agents**: Support for distributed agent deployment
+- **Advanced Analytics**: More sophisticated performance analysis
+- **Agent Marketplace**: Repository of pre-built agents for common use cases
+- **Algorithm Performance Comparison**: Tools to compare different algorithms
+- **Hybrid Agents**: Agents that combine multiple strategies
+
+## 🤝 Contributing
+
+We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
+
+## 📄 License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
\ No newline at end of file
diff --git a/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning b/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning
index 63a06da..be3199f 100644
Binary files a/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning and b/data/output/actor_pricing_double_capa_500_2_average_power_m_200_m_post_tuning differ
diff --git a/main.py b/main.py
index 1df01fe..7fd0896 100644
--- a/main.py
+++ b/main.py
@@ -1,51 +1,142 @@
-# Executes full simulation routine
+# Executes full simulation routine with integrated agent decision system
+# Supports both learnable agents (RL agents) and non-learnable agents (rule-based, algorithm agents)
+
 from resources.configuration.configuration import Configuration
-from utilities.sim_input_processing import sample_week
 from run_simulation import run_single_simulation
+from simulation.operations.agent_decision_system import agent_decision_system
+from utilities.rl_agents.interfaces import DecisionType
+from utilities.agent_factory import create_agent, is_agent_learnable, get_agent_configuration
+from utilities.training_manager import run_learnable_agent_training
+from resources.logging.log import lg
 
 
-# Change working directory to path of run.py
-
 def run_experiments():
-    number_of_chargers = 200
-    PV_CAPA = Configuration.instance().PV
-    STORAGE_CAPA = 0
-    max_cap = 50
-    max_grid_usage = 2000
-    TRANSFORMER_NUM = Configuration.instance().grid
-    START = sample_week(
-        sim_seasons=Configuration.instance().SIM_SEASON,
-        summer_start=Configuration.instance().SUMMER_START,
-        summer_end=Configuration.instance().SUMMER_END,
-        seed=42,
-    )
-    print(START)
-    # week = random.sample(TRAIN_WEEKS, 1)
-    # week = START
-    results = None
-    results = [f"{Configuration.instance().POST_FIX}", f"state{9}", f"week{1}"]
-    chargers = {
-        "fast_one": number_of_chargers,
-        "fast_two": 0,
-        "fast_four": 0,
-        "slow_one": 0,
-        "slow_two": 0,
-        "slow_four": 0,
-    }
-
-    run_single_simulation(
-        charging_agent=None,
-        storage_agent=None,
-        pricing_agent=None,
-        num_charger=chargers,
-        turn_off_monitoring=False,
-        turn_on_results=results,
-        turn_on_plotting=True,
-        transformer_num=TRANSFORMER_NUM,
-        storage_capa=STORAGE_CAPA,
-        pv_capa=PV_CAPA,
-        year=9,
-        start_day=START)
+    """
+    Main experiment runner that orchestrates the simulation.
+    
+    This function:
+    1. Creates and configures agents based on configuration
+    2. Registers agents with the decision system
+    3. Runs training for learnable agents or single simulation for non-learnable agents
+    """
+    config = Configuration.instance()
+    
+    print("Starting EVCC simulation with agent decision system...")
+    
+    # Get agent configuration
+    agent_config = get_agent_configuration(config)
+    
+    # Create and register agents
+    agents = {}
+    learnable_agents = {}
     
+    # Create pricing agent if configured
+    if agent_config["pricing"]["agent_type"]:
+        pricing_agent = create_agent(
+            "pricing", 
+            agent_config["pricing"]["agent_type"], 
+            strategy=agent_config["pricing"]["strategy"]
+        )
+        agents["pricing"] = pricing_agent
+        agent_decision_system.register_agent(DecisionType.PRICING, pricing_agent)
+        print(f"Created pricing agent: {pricing_agent.__class__.__name__} with strategy: {agent_config['pricing']['strategy']}")
+        
+        # Check if this is a learnable agent
+        if is_agent_learnable(agent_config["pricing"]["agent_type"]):
+            learnable_agents["pricing"] = pricing_agent
+            print(f"Learnable agent detected: pricing - {pricing_agent.__class__.__name__}")
+    
+    # Create charging agent if configured
+    if agent_config["charging"]["agent_type"]:
+        charging_agent = create_agent(
+            "charging", 
+            agent_config["charging"]["agent_type"], 
+            algorithm=agent_config["charging"]["algorithm"],
+            strategy=agent_config["charging"]["strategy"]
+        )
+        agents["charging"] = charging_agent
+        agent_decision_system.register_agent(DecisionType.CHARGING, charging_agent)
+        print(f"Created charging agent: {charging_agent.__class__.__name__} with strategy: {agent_config['charging']['strategy']}")
+        
+        if is_agent_learnable(agent_config["charging"]["agent_type"]):
+            learnable_agents["charging"] = charging_agent
+            print(f"Learnable agent detected: charging - {charging_agent.__class__.__name__}")
+    
+    # Create storage agent if configured
+    if agent_config["storage"]["agent_type"]:
+        storage_agent = create_agent(
+            "storage", 
+            agent_config["storage"]["agent_type"], 
+            algorithm=agent_config["storage"]["algorithm"],
+            strategy=agent_config["storage"]["strategy"]
+        )
+        agents["storage"] = storage_agent
+        agent_decision_system.register_agent(DecisionType.STORAGE, storage_agent)
+        print(f"Created storage agent: {storage_agent.__class__.__name__} with strategy: {agent_config['storage']['strategy']}")
+        
+        if is_agent_learnable(agent_config["storage"]["agent_type"]):
+            learnable_agents["storage"] = storage_agent
+            print(f"Learnable agent detected: storage - {storage_agent.__class__.__name__}")
+    
+    # Create routing agent if configured
+    if agent_config["routing"]["agent_type"]:
+        routing_agent = create_agent(
+            "routing", 
+            agent_config["routing"]["agent_type"], 
+            algorithm=agent_config["routing"]["algorithm"],
+            strategy=agent_config["routing"]["strategy"]
+        )
+        agents["routing"] = routing_agent
+        agent_decision_system.register_agent(DecisionType.ROUTING, routing_agent)
+        print(f"Created routing agent: {routing_agent.__class__.__name__} with strategy: {agent_config['routing']['strategy']}")
+        
+        if is_agent_learnable(agent_config["routing"]["agent_type"]):
+            learnable_agents["routing"] = routing_agent
+            print(f"Learnable agent detected: routing - {routing_agent.__class__.__name__}")
+    
+    # Enable dynamic pricing if using learnable pricing agents
+    if (agent_config["pricing"]["agent_type"] and 
+        is_agent_learnable(agent_config["pricing"]["agent_type"])):
+        config.dynamic_pricing = True
+        print(f"Enabled dynamic pricing for learnable agent: {agent_config['pricing']['agent_type']}")
+    
+    # Run experiments based on agent types
+    if learnable_agents:
+        print(f"\nFound {len(learnable_agents)} learnable agents. Running training...")
+        
+        # Run training for each learnable agent
+        for decision_type, agent in learnable_agents.items():
+            agent_type = agent_config[decision_type]["agent_type"]
+            run_learnable_agent_training(agent_type, agent, decision_type, config)
+            
+    else:
+        print("\nNo learnable agents detected. Running single simulation...")
+        
+        # Run single simulation with non-learnable agents
+        # Use a default start day if not specified
+        default_start_day = "2019-05-20"  # Default Monday in May 2019
+        
+        # Prepare results parameters for single simulation
+        results_params = [f"{getattr(config, 'POST_FIX', 'sim')}", f"state{9}", f"week{1}"]
+        
+        run_single_simulation(
+            charging_agent=agents.get("charging"),
+            storage_agent=agents.get("storage"),
+            pricing_agent=agents.get("pricing"),
+            num_charger={"fast_one": config.facility_size, "fast_two": 0, "fast_four": 0, 
+                        "slow_one": 0, "slow_two": 0, "slow_four": 0},
+            turn_off_monitoring=False,
+            turn_on_results=results_params,  # Pass list instead of boolean
+            turn_on_plotting=True,
+            transformer_num=config.TRANSFORMER_NUM,
+            storage_capa=config.STORAGE_SIZE,
+            pv_capa=config.PV_INSTALLED_CAPA,
+            year=9,
+            start_day=default_start_day
+        )
+    
+    print("Simulation completed successfully!")
+
 
-run_experiments()
\ No newline at end of file
+if __name__ == "__main__":
+    run_experiments()
\ No newline at end of file
diff --git a/resources/configuration/configuration.py b/resources/configuration/configuration.py
index b910fc1..135d1d5 100644
--- a/resources/configuration/configuration.py
+++ b/resources/configuration/configuration.py
@@ -42,7 +42,7 @@ def __init__(self):
         # if self.benchmarking:
         #     self.peak_cost = 0
         self.remove_low_request_EVs = False
-        self.evaluation_after_training = True
+        self.evaluation_after_training = False
         self.demand_threshold = 0
         self.duration_threshold = 1000000
         self.request_adjusting_mode = "Continuous"  #'Discrete, Continuous'
@@ -115,6 +115,53 @@ def __init__(self):
         self.lower_base_power = 0
         self.higher_base_power = 0
 
+        # Agent Decision System Configuration (defaults)
+        # =============================================
+        self.use_agent_decision_system = True  # Enable the new agent decision system
+        self.default_agent_types = {
+            "pricing": "RULE_BASED",      # Default pricing agent type
+            "charging": "HEURISTIC",      # Default charging agent type (algorithm wrapper)
+            "storage": "HEURISTIC",       # Default storage agent type (algorithm wrapper)
+            "routing": "HEURISTIC",       # Default routing agent type (algorithm wrapper)
+            "vehicle_assignment": "HEURISTIC",  # Default vehicle assignment agent type
+            "parking_allocation": "RULE_BASED", # Default parking allocation agent type
+            "grid_management": "RULE_BASED",    # Default grid management agent type
+            "demand_forecasting": "RULE_BASED"  # Default demand forecasting agent type
+        }
+
+        self.agent_configuration_file = None  # Path to agent-specific configuration
+        self.enable_decision_tracking = True   # Enable comprehensive decision tracking
+        self.enable_performance_monitoring = True  # Enable agent performance monitoring
+        
+        # Agent Algorithm Configuration (defaults)
+        # ======================================
+        # Default algorithms for algorithm agents (when agent_type = "HEURISTIC")
+        self.default_algorithms = {
+            "charging": "first_come_first_served",  # Default charging algorithm
+            "routing": "lowest_occupancy_first",    # Default routing algorithm
+            "storage": "peak_shaving"               # Default storage algorithm
+        }
+        
+        # Agent Performance Thresholds (defaults)
+        # =====================================
+        self.agent_confidence_threshold = 0.7      # Minimum confidence for agent decisions
+        self.agent_timeout_seconds = 30.0          # Timeout for agent decisions
+        self.agent_fallback_enabled = True         # Enable fallback to direct calls if agent fails
+        self.enable_hyperparameter_tuning = False
+        self.save_training_results = False         # Don't save results during training by default
+
+        # Agent Strategy Configuration (defaults)
+        self.default_strategies = {
+            "pricing": "time_of_use",
+            "charging": "first_come_first_served", 
+            "storage": "peak_shaving",
+            "routing": "lowest_occupancy_first",
+            "vehicle_assignment": "lowest_occupancy_first",  # Same as routing
+            "parking_allocation": "lowest_occupancy_first",  # Same as routing
+            "grid_management": "load_balancing",
+            "demand_forecasting": "historical_average"
+        }
+
         # from main file
         self.set_parameters_from_ini_file()
 
@@ -188,6 +235,51 @@ def adjust_peak_penalty(self, peak_penalty):
         if peak_penalty == 'h':
             self.peak_cost = self.peak_cost * 3
 
+    def get_agent_configuration(self, decision_type: str) -> dict:
+        """
+        Get agent configuration for a specific decision type.
+        
+        Args:
+            decision_type: The type of decision (pricing, charging, storage, routing, etc.)
+            
+        Returns:
+            Dictionary containing agent configuration
+        """
+        if not self.use_agent_decision_system:
+            return {}
+            
+        agent_type = self.default_agent_types.get(decision_type.lower(), "RULE_BASED")
+        algorithm = self.default_algorithms.get(decision_type.lower(), None)
+        
+        config = {
+            "agent_type": agent_type,
+            "enabled": True,
+            "confidence_threshold": self.agent_confidence_threshold,
+            "timeout_seconds": self.agent_timeout_seconds,
+            "fallback_enabled": self.agent_fallback_enabled
+        }
+        
+        if algorithm:
+            config["algorithm"] = algorithm
+            
+        return config
+    
+    def get_all_agent_configurations(self) -> dict:
+        """
+        Get configuration for all agent types.
+        
+        Returns:
+            Dictionary containing configurations for all decision types
+        """
+        if not self.use_agent_decision_system:
+            return {}
+            
+        configs = {}
+        for decision_type in self.default_agent_types.keys():
+            configs[decision_type] = self.get_agent_configuration(decision_type)
+            
+        return configs
+
 
     def set_parameters_from_ini_file(self) -> None:
 
@@ -201,6 +293,9 @@ def set_parameters_from_ini_file(self) -> None:
         self.CACHE_PATH_WS = parser_main.get("SETTINGS", "caching_path")
         self.OUTPUT_DATA_PATH = parser_main.get("SETTINGS", "raw_output_save_path")
         self.OUTPUT_VIZ_PATH = parser_main.get("SETTINGS", "visuals_save_path")
+        
+        # Logging configuration
+        self.log_level = parser_main.get("SETTINGS", "log_level", fallback="ERROR")
 
         # self.TRAIN_WEEKS, self.TEST_WEEKS = sample_training_and_test_weeks(seed=None)
         self.SIM_SEASON = parser_main.get("ENVIRONMENT", "sim_season").split(",")
@@ -264,6 +359,66 @@ def set_parameters_from_ini_file(self) -> None:
         self.LOOKAHEAD = parser_main.getint("OPERATOR", "num_lookahead_planning_periods")
         self.LOOKBACK = 24 * 60
 
+        # Agent Decision System Configuration
+        # ===================================
+        if parser_main.has_section("AGENT_DECISION_SYSTEM"):
+            self.use_agent_decision_system = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enabled", fallback=True)
+            self.enable_decision_tracking = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_decision_tracking", fallback=True)
+            self.enable_performance_monitoring = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_performance_monitoring", fallback=True)
+            self.agent_confidence_threshold = parser_main.getfloat("AGENT_DECISION_SYSTEM", "confidence_threshold", fallback=0.7)
+            self.agent_timeout_seconds = parser_main.getfloat("AGENT_DECISION_SYSTEM", "timeout_seconds", fallback=30.0)
+            self.agent_fallback_enabled = parser_main.getboolean("AGENT_DECISION_SYSTEM", "fallback_enabled", fallback=True)
+            self.enable_hyperparameter_tuning = parser_main.getboolean("AGENT_DECISION_SYSTEM", "enable_hyperparameter_tuning", fallback=False)
+            self.save_training_results = parser_main.getboolean("AGENT_DECISION_SYSTEM", "save_training_results", fallback=False)
+            
+            # Read agent types for each decision type
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "pricing_agent_type"):
+                self.default_agent_types["pricing"] = parser_main.get("AGENT_DECISION_SYSTEM", "pricing_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "charging_agent_type"):
+                self.default_agent_types["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "charging_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "storage_agent_type"):
+                self.default_agent_types["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "storage_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "routing_agent_type"):
+                self.default_agent_types["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "routing_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "vehicle_assignment_agent_type"):
+                self.default_agent_types["vehicle_assignment"] = parser_main.get("AGENT_DECISION_SYSTEM", "vehicle_assignment_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "parking_allocation_agent_type"):
+                self.default_agent_types["parking_allocation"] = parser_main.get("AGENT_DECISION_SYSTEM", "parking_allocation_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "grid_management_agent_type"):
+                self.default_agent_types["grid_management"] = parser_main.get("AGENT_DECISION_SYSTEM", "grid_management_agent_type")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "demand_forecasting_agent_type"):
+                self.default_agent_types["demand_forecasting"] = parser_main.get("AGENT_DECISION_SYSTEM", "demand_forecasting_agent_type")
+            
+            # Read strategy parameters for each decision type
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "pricing_strategy"):
+                self.default_strategies["pricing"] = parser_main.get("AGENT_DECISION_SYSTEM", "pricing_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "charging_strategy"):
+                self.default_strategies["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "charging_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "storage_strategy"):
+                self.default_strategies["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "storage_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "routing_strategy"):
+                self.default_strategies["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "routing_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "vehicle_assignment_strategy"):
+                self.default_strategies["vehicle_assignment"] = parser_main.get("AGENT_DECISION_SYSTEM", "vehicle_assignment_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "parking_allocation_strategy"):
+                self.default_strategies["parking_allocation"] = parser_main.get("AGENT_DECISION_SYSTEM", "parking_allocation_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "grid_management_strategy"):
+                self.default_strategies["grid_management"] = parser_main.get("AGENT_DECISION_SYSTEM", "grid_management_strategy")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "demand_forecasting_strategy"):
+                self.default_strategies["demand_forecasting"] = parser_main.get("AGENT_DECISION_SYSTEM", "demand_forecasting_strategy")
+            
+            # Read default algorithms for algorithm agents
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_charging_algorithm"):
+                self.default_algorithms["charging"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_charging_algorithm")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_routing_algorithm"):
+                self.default_algorithms["routing"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_routing_algorithm")
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "default_storage_algorithm"):
+                self.default_algorithms["storage"] = parser_main.get("AGENT_DECISION_SYSTEM", "default_storage_algorithm")
+            
+            # Read agent configuration file path if specified
+            if parser_main.has_option("AGENT_DECISION_SYSTEM", "agent_configuration_file"):
+                self.agent_configuration_file = parser_main.get("AGENT_DECISION_SYSTEM", "agent_configuration_file")
+
         self.MAINTENANCE_COST = parser_main.getfloat("CAPEX", "maintenance_cost")
         self.ELECTRICITY_TARIFF = parser_main.get("OPEX", "hourly_energy_costs").split(",")
         self.ELECTRICITY_TARIFF = [int(x) / 100 for x in self.ELECTRICITY_TARIFF]
diff --git a/resources/configuration/ini_files/Conda_Environment_Config.txt b/resources/configuration/ini_files/Conda_Environment_Config.txt
deleted file mode 100644
index 7bab06a..0000000
--- a/resources/configuration/ini_files/Conda_Environment_Config.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: linux-64
-@EXPLICIT
-https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2020.12.5-ha878542_0.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.33.1-h53a641e_7.conda
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_19.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-9.1.0-hdf63c60_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-9.1.0-hdf63c60_0.conda
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_19.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/icu-67.1-he1b5a44_0.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda
-https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.12-pthreads_hb3c22a3_1.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.2-he6710b0_1.conda
-https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1k-h27cfd23_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.5-h7b6447c_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7b6447c_3.conda
-https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-8_openblas.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h21135ba_2.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/readline-8.1-h27cfd23_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.10-hbc83047_0.conda
-https://conda.anaconda.org/conda-forge/linux-64/freetype-2.10.4-h7ca028e_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-8_openblas.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-8_openblas.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.35.4-hdfb4753_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/python-3.7.10-hdb3f193_0.conda
-https://conda.anaconda.org/conda-forge/noarch/et_xmlfile-1.0.1-py_1001.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/jdcal-1.4.1-py_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.5-py37h95a1406_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/pyparsing-2.4.7-pyh9f0ad1d_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.7-1_cp37m.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/pytz-2021.1-pyhd8ed1ab_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/simpy-4.0.1-pyhd8ed1ab_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2
-https://repo.anaconda.com/pkgs/main/noarch/wheel-0.36.2-pyhd3eb1b0_0.conda
-https://conda.anaconda.org/conda-forge/noarch/xlrd-2.0.1-pyhd8ed1ab_3.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/certifi-2020.12.5-py37h89c1867_1.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/cycler-0.10.0-py_2.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.3.1-py37hc928c03_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/openpyxl-3.0.7-pyhd8ed1ab_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.1-py_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.3-py37h8911b10_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.1-py37h4abf009_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/pandas-0.25.3-py37hb3f55d8_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.1-py_0.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/setuptools-52.0.0-py37h06a4308_0.conda
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.2.2-py37h1d35a4c_1.tar.bz2
-https://repo.anaconda.com/pkgs/main/linux-64/pip-21.0.1-py37h06a4308_0.conda
-https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.11.1-py37h8f50634_2.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.2.2-1.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.1-pyhd8ed1ab_1.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.1-hd8ed1ab_1.tar.bz2
diff --git a/resources/configuration/ini_files/app-local.ini b/resources/configuration/ini_files/app-local.ini
deleted file mode 100644
index a3e74d8..0000000
--- a/resources/configuration/ini_files/app-local.ini
+++ /dev/null
@@ -1,67 +0,0 @@
-# Basic Settings
-[SETTINGS]
-raw_input_path = /Volumes/karsten.schroer@uni-koeln.de/Uni/Research/04_CASE_Clustered_EV_Charging/
-raw_output_save_path = ./Utilities/raw_output/
-visuals_save_path= ./Utilities/visuals_output/
-log_level = ERROR
-
-# File to Environment Bounding Box
-[ENVIRONMENT]
-#name of scenario (appears as postfix in output file names)
-post_fix= _testing
-#date
-sim_start_day = 2019-06-03
-#days
-sim_duration = 1
-day_types = Workday,Saturday,Sunday
-
-# Charging/Parking Requests
-[REQUESTS]
-#which facility to sample parking requests from
-facility = Facility_3
-#share of EVs in total population
-ev_share = 0.25
-#region for undelying EV population
-region = CA
-# artificially limit daily requests to a specific number
-max_daily_requests = False
-
-# Facility Attributes
-[INFRASTRUCTURE]
-parking_capa = 1000
-# number of chargers
-num_charger = 1000
-#connectors per charger
-num_connector = 1
-#all in KW
-charger_power = 22
-grid_capa = 700
-min_facility_baseload = 75
-max_facility_baseload = 300
-installed_capa_PV = 200
-
-# Operator Configurations
-[OPERATOR]
-# routing decisions
-routing_algo = None
-#charging decisions
-charging_algo = equal_sharing
-# how is a planning period defined (in sim time)
-planning_period_length = 15
-# how much foresight in operations?
-num_lookahead_planning_periods=1
-
-# Investment Costs
-[CAPEX]
-charger_cost = 4000
-connector_cost = 250
-#USD/kW
-grid_expansion_cost = 240
-
-# Operational Costs
-[OPEX]
-#USD/kWh
-hourly_energy_costs = 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 23, 23, 23, 23, 23, 23, 8, 8
-#USD/kW
-monthly_peak_cost = 15.84
-
diff --git a/resources/configuration/ini_files/app-remote_second_WS.ini b/resources/configuration/ini_files/app-remote.ini
similarity index 58%
rename from resources/configuration/ini_files/app-remote_second_WS.ini
rename to resources/configuration/ini_files/app-remote.ini
index b0cfd20..ca62c10 100644
--- a/resources/configuration/ini_files/app-remote_second_WS.ini
+++ b/resources/configuration/ini_files/app-remote.ini
@@ -48,8 +48,8 @@ num_connector = 1
 #all in kW
 charger_power_fast = 50
 charger_power_slow = 22
-num_transformer = 0
-grid_capa = 0
+num_transformer = 3
+grid_capa = 500
 #1.2 maximum load
 min_facility_baseload = 75
 ;75
@@ -62,22 +62,29 @@ installed_capa_PV = 600
 installed_storage = 500
 
 # Operator Configurations
+# =======================
+# NOTE: These legacy algorithm configurations are now used as defaults for algorithm agents
+# when the agent decision system is enabled. The actual algorithms are wrapped as agents
+# and can be configured in the [AGENT_DECISION_SYSTEM] section above.
 [OPERATOR]
 #routing decisions
     # choose from: perfect_info, random, lowest_occupancy_first, lowest_utilization_first, fill_one_after_other,
     # matching_supply_demand, minimum_power_requirement
+    # These algorithms are now wrapped as AlgorithmRoutingAgent when agent_type = HEURISTIC
 routing_algo = minimum_power_requirement
 #charging decisions
     # choose from: perfect_info, uncontrolled, first_come_first_served, earliest_deadline_first, least_laxity_first, equal_sharing,
         # online_myopic, online_multi_period, integrated_storage, dynamic, dynamic_multi_agent, average_power
+        # These algorithms are now wrapped as AlgorithmChargingAgent when agent_type = HEURISTIC
 charging_algo = average_power
 #storage decision
 # choose from: temporal_arbitrage, peak_shaving
+# These algorithms are now wrapped as AlgorithmStorageAgent when agent_type = HEURISTIC
 storage_algo = peak_shaving
 # how is a planning period defined (in sim time)
 scheduling_mode = discrete_time
 # at which intervals (in unit sim time) to do the routing/charging re-planning
-planning_interval = 59
+planning_interval = 15
 # how long a period is in the optimization (in unit sim time)
 optimization_period_length = 15
 # how much foresight in operations (in optimization periods)?, only relevant for dynamic model!
@@ -88,6 +95,67 @@ service_level=1
 minimum_served_demand = 1
 
 penalty_for_missed_kWh = 0.5
+
+# Agent Decision System Configuration
+# ===================================
+# This section configures the new agent-based decision system where all decisions
+# are made by agents (RL agents, rule-based agents, algorithm agents) rather than
+# being hardcoded in business logic.
+[AGENT_DECISION_SYSTEM]
+# Enable/disable the agent decision system
+enabled = True
+
+# Enable comprehensive decision tracking and monitoring
+enable_decision_tracking = True
+enable_performance_monitoring = True
+
+# Agent performance thresholds
+confidence_threshold = 0.7
+timeout_seconds = 30.0
+fallback_enabled = True
+# Enable hyperparameter tuning for learnable agents (grid search over learning_rate, batch_size, tau)
+enable_hyperparameter_tuning = False
+# Save training results during training episodes (can slow down training)
+save_training_results = False
+
+# Agent types for each decision type
+# Available options: RL_SAC, RL_DQN, RL_DDPG, RULE_BASED, HEURISTIC, OPTIMIZATION, ML_MODEL
+pricing_agent_type = RULE_BASED
+charging_agent_type = HEURISTIC
+storage_agent_type = HEURISTIC
+routing_agent_type = HEURISTIC
+vehicle_assignment_agent_type = HEURISTIC
+parking_allocation_agent_type = HEURISTIC
+grid_management_agent_type = HEURISTIC
+demand_forecasting_agent_type = HEURISTIC
+
+# Strategy parameters for each decision type
+# Available strategies depend on the agent type and decision type
+pricing_strategy = time_of_use
+charging_strategy = first_come_first_served
+storage_strategy = peak_shaving
+routing_strategy = lowest_occupancy_first
+vehicle_assignment_strategy = lowest_occupancy_first
+parking_allocation_strategy = lowest_occupancy_first
+grid_management_strategy = load_balancing
+demand_forecasting_strategy = historical_average
+
+# Default algorithms for algorithm agents (when agent_type = HEURISTIC)
+# Charging algorithms: uncontrolled, first_come_first_served, earliest_deadline_first, 
+#                     least_laxity_first, equal_sharing, online_myopic, online_multi_period, 
+#                     integrated_storage, perfect_info, perfect_info_with_storage
+default_charging_algorithm = first_come_first_served
+
+# Routing algorithms: random, lowest_occupancy_first, fill_one_after_other, 
+#                    lowest_utilization_first, matching_supply_demand, minimum_power_requirement
+default_routing_algorithm = lowest_occupancy_first
+
+# Storage algorithms: uncontrolled, temporal_arbitrage, peak_shaving
+default_storage_algorithm = peak_shaving
+
+# Path to agent-specific configuration file (optional)
+# agent_configuration_file = config/agents.json
+
 # Investment Costs
 [CAPEX]
 maintenance_cost = 0.05
diff --git a/resources/logging/log.py b/resources/logging/log.py
index 67fa89d..f42514c 100644
--- a/resources/logging/log.py
+++ b/resources/logging/log.py
@@ -3,7 +3,26 @@
 from resources.logging.simulation_context_filter import SimulationContextFilter
 
 lg = logging.getLogger()
-lg.setLevel(logging.ERROR)
+
+# Try to get log level from configuration, fallback to ERROR if not available
+try:
+    from resources.configuration.configuration import Configuration
+    config = Configuration.instance()
+    log_level_str = getattr(config, 'log_level', 'ERROR')
+    
+    # Convert string to logging level
+    log_level_map = {
+        'DEBUG': logging.DEBUG,
+        'INFO': logging.INFO,
+        'WARNING': logging.WARNING,
+        'ERROR': logging.ERROR,
+        'CRITICAL': logging.CRITICAL
+    }
+    log_level = log_level_map.get(log_level_str.upper(), logging.ERROR)
+except:
+    log_level = logging.ERROR
+
+lg.setLevel(log_level)
 
 # Avoid adding duplicate handlers if this module is imported multiple times
 if not lg.handlers:
@@ -11,11 +30,11 @@
     formatter = logging.Formatter('%(asctime)s [%(levelname)s] [Time %(env_time)10s] [%(clazz)30s %(oid)3s]: %(message)s')
     file_handler = logging.FileHandler("report.log")
     file_handler.setFormatter(formatter)
-    file_handler.setLevel(logging.ERROR)
+    file_handler.setLevel(log_level)
 
     stream_handler = logging.StreamHandler()
     stream_handler.setFormatter(formatter)
-    stream_handler.setLevel(logging.ERROR)
+    stream_handler.setLevel(log_level)
 
     lg.addHandler(file_handler)
     lg.addHandler(stream_handler)
diff --git a/run_simulation.py b/run_simulation.py
index 6706b3c..f88ec8f 100644
--- a/run_simulation.py
+++ b/run_simulation.py
@@ -248,29 +248,33 @@ def run_single_simulation(
             plot_time = round((plot_end_time - save_end_time) / 60, 2)
             print("Results Plotted (in {} minutes)".format(plot_time))
     if model.charging_agent:
-        model.charging_agent.save_models()
+        if hasattr(model.charging_agent, 'save_models'):
+            model.charging_agent.save_models()
     if model.pricing_agent:
-        model.pricing_agent.save_models()
+        if hasattr(model.pricing_agent, 'save_models'):
+            model.pricing_agent.save_models()
     # model.storage_agent.save_models()
-    if model.charging_agent:
+    if model.charging_agent and hasattr(model.charging_agent, 'environment'):
         lg.error(
             f"profit = {model.charging_agent.environment.total_reward['missed']},"
             f" energy = {model.charging_agent.environment.total_reward['energy']} ,feasibility "
             f"= {model.charging_agent.environment.total_reward['feasibility']}, feasibility_storage "
             f"= {model.charging_agent.environment.total_reward['feasibility_storage']}, pricing "
-            f"= {model.pricing_agent.environment.total_reward['missed']}"
+            f"= {model.pricing_agent.environment.total_reward['missed'] if hasattr(model.pricing_agent, 'environment') else 'N/A'}"
         )
-    if model.pricing_agent:
-        lg.error(f"profit ={model.pricing_agent.environment.total_reward['missed']}")
-    if model.charging_agent:
+    if model.pricing_agent and hasattr(model.pricing_agent, 'environment'):
+        lg.error(f"profit ={model.pricing_agent.environment.total_reward['profit']}")
+    if model.charging_agent and hasattr(model.charging_agent, 'environment'):
         model.charging_agent.environment.total_reward["missed"] = 0
         model.charging_agent.environment.total_reward["feasibility"] = 0
         model.charging_agent.environment.total_reward["feasibility_storage"] = 0
         model.charging_agent.environment.total_reward["energy"] = 0
-    if model.pricing_agent:
-        model.pricing_agent.environment.total_reward["missed"] = 0
-        model.pricing_agent._critic_loss = 0
-        model.pricing_agent._policy_loss = 0
+    if model.pricing_agent and hasattr(model.pricing_agent, 'environment'):
+        model.pricing_agent.environment.total_reward["profit"] = 0
+        if hasattr(model.pricing_agent, '_critic_loss'):
+            model.pricing_agent._critic_loss = 0
+        if hasattr(model.pricing_agent, '_policy_loss'):
+            model.pricing_agent._policy_loss = 0
     # model.storage_agent.environment.total_reward['test'] = 0
     output = pd.DataFrame(
         [
diff --git a/simulation/model.py b/simulation/model.py
index 4bb9f11..a79419a 100644
--- a/simulation/model.py
+++ b/simulation/model.py
@@ -583,7 +583,7 @@ def _init_operations(
         self.costs = dict(investment=0, operations=0)
         self.objective_function = 0
         self.total_energy_charged = 0
-        self.reward = dict(costs=0, missed=0, feasibility=0, feasibility_storage=0)
+        self.reward = dict(costs=0, profit=0, feasibility=0, feasibility_storage=0)
         
         # Create operator
         self.operator = Operator(
@@ -628,27 +628,54 @@ def _init_agents(
         
         # Setup charging agent
         if self.charging_agent:
-            self.charging_agent.environment.state = (
-                self.charging_agent.environment.get_state(self, self.env)
-            )
-            self.charging_agent.environment.env = self.env
-            self.charging_agent.reset_game()
+            # Check if agent has environment (RL agents) or not (rule-based/algorithm agents)
+            if hasattr(self.charging_agent, 'environment'):
+                # Set charging_hub and env in the environment for RL agents
+                self.charging_agent.environment.charging_hub = self
+                self.charging_agent.environment.env = self.env
+                self.charging_agent.environment.state = (
+                    self.charging_agent.environment.get_state(self, self.env)
+                )
+                self.charging_agent.reset_game()
+            else:
+                # For rule-based/algorithm agents, just store the charging hub reference
+                if hasattr(self.charging_agent, 'set_charging_hub'):
+                    self.charging_agent.set_charging_hub(self)
+                print(f"Initialized charging agent: {self.charging_agent.__class__.__name__}")
         
         # Setup pricing agent
         if self.pricing_agent:
-            self.pricing_agent.environment.state = self.pricing_agent.environment.get_state(
-                self, self.env
-            )
-            self.pricing_agent.environment.env = self.env
-            self.pricing_agent.reset_game()
+            # Check if agent has environment (RL agents) or not (rule-based/algorithm agents)
+            if hasattr(self.pricing_agent, 'environment'):
+                # Set charging_hub and env in the environment for RL agents
+                self.pricing_agent.environment.charging_hub = self
+                self.pricing_agent.environment.env = self.env
+                self.pricing_agent.environment.state = self.pricing_agent.environment.get_state(
+                    self, self.env
+                )
+                self.pricing_agent.reset_game()
+            else:
+                # For rule-based/algorithm agents, just store the charging hub reference
+                if hasattr(self.pricing_agent, 'set_charging_hub'):
+                    self.pricing_agent.set_charging_hub(self)
+                print(f"Initialized pricing agent: {self.pricing_agent.__class__.__name__}")
         
         # Setup storage agent
         if self.storage_agent:
-            self.storage_agent.environment.state = (
-                self.storage_agent.environment.get_state(self, self.env)
-            )
-            self.storage_agent.environment.env = self.env
-            self.storage_agent.reset_game()
+            # Check if agent has environment (RL agents) or not (rule-based/algorithm agents)
+            if hasattr(self.storage_agent, 'environment'):
+                # Set charging_hub and env in the environment for RL agents
+                self.storage_agent.environment.charging_hub = self
+                self.storage_agent.environment.env = self.env
+                self.storage_agent.environment.state = (
+                    self.storage_agent.environment.get_state(self, self.env)
+                )
+                self.storage_agent.reset_game()
+            else:
+                # For rule-based/algorithm agents, just store the charging hub reference
+                if hasattr(self.storage_agent, 'set_charging_hub'):
+                    self.storage_agent.set_charging_hub(self)
+                print(f"Initialized storage agent: {self.storage_agent.__class__.__name__}")
         
         # Link agents to operator
         self.operator.charging_agent = charging_agent
diff --git a/simulation/operations/agent_decision_system.py b/simulation/operations/agent_decision_system.py
new file mode 100644
index 0000000..c3e8ea5
--- /dev/null
+++ b/simulation/operations/agent_decision_system.py
@@ -0,0 +1,306 @@
+from typing import Any, Dict, List, Optional, Union, Type
+import logging
+from dataclasses import dataclass
+from datetime import datetime
+import uuid
+
+from utilities.rl_agents.interfaces import (
+    BaseAgent, 
+    DecisionType, 
+    AgentType,
+    PricingAgent,
+    ChargingAgent,
+    StorageAgent,
+    RoutingAgent,
+    VehicleAssignmentAgent,
+    ParkingAllocationAgent,
+    GridManagementAgent,
+    DemandForecastingAgent
+)
+from simulation.operations.decision_request_system import (
+    DecisionRequestSystem, 
+    DecisionRequest, 
+    DecisionResponse,
+    RequestStatus,
+    decision_system
+)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class AgentDecision:
+    """Represents a decision made by an agent"""
+    decision_id: str
+    agent_type: AgentType
+    decision_type: DecisionType
+    context: Dict[str, Any]
+    action: Dict[str, Any]
+    confidence: float
+    timestamp: datetime
+    metadata: Dict[str, Any]
+
+
+class AgentDecisionSystem:
+    """
+    Centralized system for managing all agent-based decisions.
+    
+    This system ensures that ALL decisions in the EV charging operations
+    are made by agents (RL agents, rule-based agents, etc.) rather than
+    being hardcoded in the business logic.
+    
+    Key principles:
+    1. No decisions in business logic - all decisions go through agents
+    2. Standardized interface for all agents
+    3. Comprehensive tracking and monitoring
+    4. Fallback mechanisms for reliability
+    5. Support for multiple agent types (RL, rule-based, ML, etc.)
+    """
+    
+    def __init__(self):
+        self.agents: Dict[DecisionType, BaseAgent] = {}
+        self.decision_history: List[AgentDecision] = []
+        self.agent_registry: Dict[str, Type[BaseAgent]] = {}
+        self.decision_callbacks: Dict[str, callable] = {}
+        
+    def register_agent(self, decision_type: DecisionType, agent: BaseAgent) -> None:
+        """
+        Register an agent for a specific decision type.
+        
+        Args:
+            decision_type: The type of decision this agent can make
+            agent: The agent instance
+        """
+        self.agents[decision_type] = agent
+        logger.info(f"Registered {agent.__class__.__name__} for {decision_type.value} decisions")
+        
+    def register_agent_class(self, name: str, agent_class: Type[BaseAgent]) -> None:
+        """
+        Register an agent class for dynamic instantiation.
+        
+        Args:
+            name: Name identifier for the agent class
+            agent_class: The agent class to register
+        """
+        self.agent_registry[name] = agent_class
+        logger.info(f"Registered agent class {name}: {agent_class.__name__}")
+        
+    def make_decision(
+        self, 
+        decision_type: DecisionType, 
+        context: Dict[str, Any],
+        vehicles: Optional[List[Any]] = None,
+        priority: int = 1,
+        timeout_seconds: float = 30.0
+    ) -> AgentDecision:
+        """
+        Make a decision using the appropriate agent.
+        
+        This is the main entry point for all decisions in the system.
+        Every decision request goes through this method.
+        
+        Args:
+            decision_type: Type of decision needed
+            context: Context information for the decision
+            vehicles: List of vehicles (for vehicle-related decisions)
+            priority: Decision priority (1-10, higher = more important)
+            timeout_seconds: Timeout for the decision
+            
+        Returns:
+            AgentDecision object containing the decision result
+            
+        Raises:
+            ValueError: If no agent is registered for the decision type
+        """
+        if decision_type not in self.agents:
+            raise ValueError(f"No agent registered for decision type: {decision_type.value}")
+            
+        agent = self.agents[decision_type]
+        decision_id = str(uuid.uuid4())
+        
+        # Create decision request for tracking
+        request_id = decision_system.create_request(
+            agent_type=decision_type,
+            state=agent.get_state(),
+            context=context,
+            priority=priority,
+            timeout_seconds=timeout_seconds,
+            metadata={
+                "agent_type": agent.agent_type.value,
+                "decision_id": decision_id
+            }
+        )
+        
+        try:
+            # Update agent state
+            agent.update_state(context)
+            
+            # Make decision based on agent type
+            if decision_type in [DecisionType.CHARGING, DecisionType.ROUTING, 
+                               DecisionType.VEHICLE_ASSIGNMENT, DecisionType.PARKING_ALLOCATION]:
+                # Vehicle-related decisions
+                if vehicles is None:
+                    vehicles = []
+                action_result = agent.select_action(vehicles, context)
+            else:
+                # Non-vehicle decisions
+                action_result = agent.select_action(context)
+            
+            # Process the request
+            response = decision_system.process_request(request_id)
+            
+            # Create decision record
+            decision = AgentDecision(
+                decision_id=decision_id,
+                agent_type=agent.agent_type,
+                decision_type=decision_type,
+                context=context,
+                action=action_result,
+                confidence=action_result.get("confidence", 0.5),
+                timestamp=datetime.now(),
+                metadata={
+                    "request_id": request_id,
+                    "agent_class": agent.__class__.__name__,
+                    "vehicles_count": len(vehicles) if vehicles else 0
+                }
+            )
+            
+            # Store decision in history
+            self.decision_history.append(decision)
+            
+            # Call any registered callbacks
+            if decision_type.value in self.decision_callbacks:
+                self.decision_callbacks[decision_type.value](decision)
+            
+            logger.info(f"Decision made: {decision_type.value} by {agent.__class__.__name__}")
+            return decision
+            
+        except Exception as e:
+            logger.error(f"Error making {decision_type.value} decision: {e}")
+            # Mark request as failed
+            if request_id in decision_system.requests:
+                decision_system.requests[request_id].status = RequestStatus.FAILED
+                decision_system.requests[request_id].metadata["error"] = str(e)
+            raise
+    
+    def make_pricing_decision(self, context: Dict[str, Any]) -> AgentDecision:
+        """Make a pricing decision using the pricing agent."""
+        return self.make_decision(DecisionType.PRICING, context)
+    
+    def make_charging_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision:
+        """Make a charging decision using the charging agent."""
+        return self.make_decision(DecisionType.CHARGING, context, vehicles)
+    
+    def make_storage_decision(self, context: Dict[str, Any]) -> AgentDecision:
+        """Make a storage decision using the storage agent."""
+        return self.make_decision(DecisionType.STORAGE, context)
+    
+    def make_routing_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision:
+        """Make a routing decision using the routing agent."""
+        return self.make_decision(DecisionType.ROUTING, context, vehicles)
+    
+    def make_vehicle_assignment_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision:
+        """Make a vehicle assignment decision using the vehicle assignment agent."""
+        return self.make_decision(DecisionType.VEHICLE_ASSIGNMENT, context, vehicles)
+    
+    def make_parking_allocation_decision(self, vehicles: List[Any], context: Dict[str, Any]) -> AgentDecision:
+        """Make a parking allocation decision using the parking allocation agent."""
+        return self.make_decision(DecisionType.PARKING_ALLOCATION, context, vehicles)
+    
+    def make_grid_management_decision(self, context: Dict[str, Any]) -> AgentDecision:
+        """Make a grid management decision using the grid management agent."""
+        return self.make_decision(DecisionType.GRID_MANAGEMENT, context)
+    
+    def make_demand_forecasting_decision(self, context: Dict[str, Any]) -> AgentDecision:
+        """Make a demand forecasting decision using the demand forecasting agent."""
+        return self.make_decision(DecisionType.DEMAND_FORECASTING, context)
+    
+    def register_decision_callback(self, decision_type: DecisionType, callback: callable) -> None:
+        """
+        Register a callback to be called when a decision is made.
+        
+        Args:
+            decision_type: The decision type to monitor
+            callback: Function to call with the decision result
+        """
+        self.decision_callbacks[decision_type.value] = callback
+        logger.info(f"Registered callback for {decision_type.value} decisions")
+    
+    def get_decision_history(self, decision_type: Optional[DecisionType] = None) -> List[AgentDecision]:
+        """
+        Get decision history, optionally filtered by decision type.
+        
+        Args:
+            decision_type: Optional filter for specific decision type
+            
+        Returns:
+            List of decisions
+        """
+        if decision_type:
+            return [d for d in self.decision_history if d.decision_type == decision_type]
+        return self.decision_history.copy()
+    
+    def get_agent_performance_stats(self) -> Dict[str, Any]:
+        """
+        Get performance statistics for all agents.
+        
+        Returns:
+            Dictionary containing performance statistics
+        """
+        stats = {}
+        
+        for decision_type, agent in self.agents.items():
+            decisions = self.get_decision_history(decision_type)
+            
+            if decisions:
+                avg_confidence = sum(d.confidence for d in decisions) / len(decisions)
+                success_rate = len([d for d in decisions if d.confidence > 0.5]) / len(decisions)
+                
+                stats[decision_type.value] = {
+                    "agent_type": agent.agent_type.value,
+                    "agent_class": agent.__class__.__name__,
+                    "total_decisions": len(decisions),
+                    "average_confidence": avg_confidence,
+                    "success_rate": success_rate,
+                    "last_decision": decisions[-1].timestamp if decisions else None
+                }
+            else:
+                stats[decision_type.value] = {
+                    "agent_type": agent.agent_type.value,
+                    "agent_class": agent.__class__.__name__,
+                    "total_decisions": 0,
+                    "average_confidence": 0.0,
+                    "success_rate": 0.0,
+                    "last_decision": None
+                }
+        
+        return stats
+    
+    def reset_agents(self) -> None:
+        """Reset all registered agents."""
+        for agent in self.agents.values():
+            agent.reset()
+        logger.info("All agents reset")
+    
+    def cleanup_old_decisions(self, max_age_hours: float = 24.0) -> None:
+        """
+        Clean up old decisions from history.
+        
+        Args:
+            max_age_hours: Maximum age of decisions to keep
+        """
+        cutoff_time = datetime.now().timestamp() - (max_age_hours * 3600)
+        
+        original_count = len(self.decision_history)
+        self.decision_history = [
+            d for d in self.decision_history
+            if d.timestamp.timestamp() > cutoff_time
+        ]
+        
+        removed_count = original_count - len(self.decision_history)
+        if removed_count > 0:
+            logger.info(f"Cleaned up {removed_count} old decisions")
+
+
+# Global instance for easy access
+agent_decision_system = AgentDecisionSystem()
diff --git a/simulation/operations/agents_controller.py b/simulation/operations/agents_controller.py
new file mode 100644
index 0000000..bea4182
--- /dev/null
+++ b/simulation/operations/agents_controller.py
@@ -0,0 +1,43 @@
+from typing import Optional, Dict, Any, List
+
+from utilities.rl_agents.interfaces import PricingAgent, ChargingAgent, StorageAgent
+
+
+class AgentsController:
+    def __init__(self,
+                 pricing: Optional[PricingAgent] = None,
+                 charging: Optional[ChargingAgent] = None,
+                 storage: Optional[StorageAgent] = None):
+        self.pricing = pricing
+        self.charging = charging
+        self.storage = storage
+
+    def reset_all(self) -> None:
+        for agent in (self.pricing, self.charging, self.storage):
+            if agent:
+                agent.reset()
+
+    def pricing_step(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if not self.pricing:
+            return None
+        self.pricing.update_state(context)
+        return self.pricing.select_action(context)
+
+    def charging_step(self, vehicles: List[Any], context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if not self.charging:
+            return None
+        self.charging.update_state(context)
+        return self.charging.select_action(vehicles, context)
+
+    def storage_step(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if not self.storage:
+            return None
+        self.storage.update_state(context)
+        return self.storage.select_action(context)
+
+    def learn_all(self, transition: Dict[str, Any]) -> None:
+        for agent in (self.pricing, self.charging, self.storage):
+            if agent:
+                agent.learn(transition)
+
+
diff --git a/simulation/operations/charging_service.py b/simulation/operations/charging_service.py
new file mode 100644
index 0000000..c2a1f26
--- /dev/null
+++ b/simulation/operations/charging_service.py
@@ -0,0 +1,198 @@
+from typing import List, Dict, Any, Optional
+from simulation.operations.agents_controller import AgentsController
+from simulation.config_facade import ConfigFacade
+from .decision_request_system import DecisionType, decision_system
+from .decision_decorators import auto_register_agents
+
+
+class ChargingService:
+    """
+    Service class for managing charging-related RL agent operations.
+    
+    Encapsulates all charging agent logic that was previously in the Operator class,
+    providing a clean separation of concerns and standardized interface for RL agents.
+    """
+    
+    def __init__(self, operator_instance: Any, agents_controller: Optional[AgentsController] = None, config_facade: Optional[ConfigFacade] = None):
+        """
+        Initialize the ChargingService.
+        
+        Args:
+            operator_instance: Reference to the main operator instance
+            agents_controller: Controller for managing RL agents
+            config_facade: Facade for accessing configuration values
+        """
+        self.op = operator_instance
+        self.agents_controller = agents_controller
+        self.config = config_facade or ConfigFacade()
+        
+        # Register agents with the decision request system
+        auto_register_agents(operator_instance)
+    
+    def take_learning_charging_actions(self, charging_strategy: str) -> None:
+        """
+        Execute learning-based charging actions using RL agents.
+        
+        Args:
+            charging_strategy: The charging strategy to use
+        """
+        if charging_strategy == "dynamic":
+            self.op.update_vehicles_status()
+            self.take_charging_action()
+            self.conduct_charging_action()
+
+            if self.op.storage_agent:
+                self.op.get_exp_free_grid_capacity()
+                # Storage actions are handled by StorageService
+                if hasattr(self.op, 'storage_service'):
+                    self.op.storage_service.take_storage_action()
+                    self.op.storage_service.conduct_storage_action()
+    
+    def update_learning_charging_agent(self, charging_strategy: str) -> None:
+        """
+        Update the learning charging agent.
+        
+        Args:
+            charging_strategy: The charging strategy to use
+        """
+        if charging_strategy == "dynamic":
+            self.update_charging_agent()
+    
+    def take_charging_action(self) -> None:
+        """
+        Take charging action using the RL charging agent.
+        """
+        if self.agents_controller and self.agents_controller.charging:
+            # Use the controller to get charging action
+            context = {
+                "charging_hub": self.op.charging_hub,
+                "env": self.op.env
+            }
+            action_result = self.agents_controller.charging_step(
+                vehicles=self.op.requests, 
+                context=context
+            )
+            if action_result:
+                self.op.charging_agent.action = action_result.get("charging_action")
+        else:
+            # Use decision request system for charging decisions
+            action = self._get_charging_decision_via_request()
+            self.op.charging_agent.action = action
+    
+    def conduct_charging_action(self) -> None:
+        """
+        Execute the charging action by applying it to vehicles and chargers.
+        """
+        action = self.op.charging_agent.action
+        action_index = 1  # Start from 1 because action[0] is reserved (possibly for pricing or metadata)
+
+        for charger in self.op.charging_hub.chargers:
+            for connector_idx in range(charger.number_of_connectors):
+                if action_index >= len(action):
+                    break  # Prevent index error if action list is shorter than expected
+
+                charging_power = action[action_index]
+                if charging_power > 0:
+                    charging_vehicles = charger.charging_vehicles
+                    if connector_idx < len(charging_vehicles):
+                        vehicle = charging_vehicles[connector_idx]
+                        vehicle.charging_power = charging_power
+                action_index += 1
+
+        self.op.check_charging_power()
+        self.op.charging_hub.grid.reset_reward()
+    
+    def update_charging_agent(self) -> None:
+        """
+        Update the charging agent with new state and experience.
+        """
+        self.op.update_vehicles_status()
+        self.op.charging_hub.reward["missed"] = self.op.reward_computing()
+
+        eval_ep = self.op.charging_agent.do_evaluation_iterations
+        self.op.charging_agent.conduct_action(self.op.charging_agent.action)
+        if self.op.charging_agent.time_for_critic_and_actor_to_learn():
+            if not eval_ep:
+                for _ in range(
+                    self.op.charging_agent.hyperparameters[
+                        "learning_updates_per_learning_session"
+                    ]
+                ):
+                    self.op.charging_agent.learn()
+        mask = (
+            False
+            if self.op.charging_agent.episode_step_number_val
+            >= self.op.charging_agent.environment.MAX_EPISODE_STEPS
+            else self.op.charging_agent.done
+        )
+        # if not eval_ep:
+        action = self.op.charging_agent.descale_action(self.op.charging_agent.action)
+        self.op.charging_agent.save_experience(
+            experience=(
+                self.op.charging_agent.state,
+                action,
+                self.op.charging_agent.reward,
+                self.op.charging_agent.next_state,
+                mask,
+            )
+        )
+        self.op.charging_agent.global_step_number += 1
+        self.op.charging_agent.step_counter += 1
+
+    def _get_charging_decision_via_request(self) -> Any:
+        """
+        Get charging decision through the decision request system.
+        
+        Returns:
+            The charging action/decision
+        """
+        # Get current state from environment
+        state = self.op.charging_hub.charging_agent.environment.get_state(
+            self.op.charging_hub, self.op.env
+        )
+        self.op.charging_agent.state = state
+
+        eval_ep = self.op.charging_agent.do_evaluation_iterations
+        self.op.charging_agent.episode_step_number_val = 0
+        
+        # Create context for the decision request
+        context = {
+            "eval_ep": eval_ep,
+            "charging_hub": self.op.charging_hub,
+            "env": self.op.env,
+            "vehicles": self.op.requests
+        }
+        
+        # Create and process decision request
+        request_id = decision_system.create_request(
+            agent_type=DecisionType.CHARGING,
+            state=self.op.charging_agent.state,
+            context=context,
+            metadata={
+                "agent_name": getattr(self.op.charging_agent, "agent_name", "Unknown")
+            }
+        )
+        
+        # Process the request
+        response = decision_system.process_request(request_id)
+        
+        if response:
+            # Rescale action if needed
+            if hasattr(self.op.charging_agent, "rescale_action"):
+                return self.op.charging_agent.rescale_action(response.action)
+            else:
+                return response.action
+        else:
+            # Fallback to direct agent call if request system fails
+            # Handle different pick_action signatures
+            import inspect
+            sig = inspect.signature(self.op.charging_agent.pick_action)
+            if len(sig.parameters) > 1:  # Method expects eval_ep parameter
+                action = self.op.charging_agent.pick_action(eval_ep)
+            else:  # Method doesn't expect eval_ep parameter
+                action = self.op.charging_agent.pick_action()
+                
+            if hasattr(self.op.charging_agent, "rescale_action"):
+                return self.op.charging_agent.rescale_action(action)
+            else:
+                return action
diff --git a/simulation/operations/decision_decorators.py b/simulation/operations/decision_decorators.py
new file mode 100644
index 0000000..a457f11
--- /dev/null
+++ b/simulation/operations/decision_decorators.py
@@ -0,0 +1,212 @@
+from typing import Any, Dict, Optional, Callable
+from functools import wraps
+import logging
+from .decision_request_system import (
+    DecisionRequestSystem, 
+    DecisionType, 
+    decision_system
+)
+
+logger = logging.getLogger(__name__)
+
+
+def require_decision_request(decision_type: DecisionType, timeout_seconds: float = 30.0):
+    """
+    Decorator that automatically creates a decision request when an RL agent method is called.
+    
+    This decorator can be applied to methods like pick_action() to ensure that every
+    decision is tracked through the request system.
+    
+    Args:
+        decision_type: The type of decision being made
+        timeout_seconds: Timeout for the request
+        
+    Example:
+        @require_decision_request(DecisionType.PRICING)
+        def pick_action(self, eval_ep=False):
+            # Original pick_action implementation
+            pass
+    """
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            # Create context from method arguments
+            context = {
+                "method_name": func.__name__,
+                "args": args,
+                "kwargs": kwargs,
+                "eval_ep": kwargs.get("eval_ep", False)
+            }
+            
+            # Get current state from the agent
+            state = getattr(self, "state", None)
+            
+            # Create decision request
+            request_id = decision_system.create_request(
+                agent_type=decision_type,
+                state=state,
+                context=context,
+                timeout_seconds=timeout_seconds,
+                metadata={
+                    "agent_class": self.__class__.__name__,
+                    "method": func.__name__
+                }
+            )
+            
+            logger.info(f"Created decision request {request_id} for {decision_type.value}")
+            
+            try:
+                # Process the request immediately
+                response = decision_system.process_request(request_id)
+                
+                if response:
+                    logger.info(f"Decision request {request_id} completed successfully")
+                    return response.action
+                else:
+                    logger.warning(f"Decision request {request_id} failed, falling back to direct call")
+                    # Fallback to original method
+                    return func(self, *args, **kwargs)
+                    
+            except Exception as e:
+                logger.error(f"Error processing decision request {request_id}: {e}")
+                # Fallback to original method
+                return func(self, *args, **kwargs)
+                
+        return wrapper
+    return decorator
+
+
+def track_decision(decision_type: DecisionType):
+    """
+    Decorator that tracks decisions without requiring the request system.
+    
+    This is a lighter-weight decorator that just logs decisions without
+    going through the full request system.
+    
+    Args:
+        decision_type: The type of decision being made
+    """
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            # Log the decision attempt
+            logger.info(f"Making {decision_type.value} decision via {func.__name__}")
+            
+            # Call the original method
+            result = func(self, *args, **kwargs)
+            
+            # Log the decision result
+            logger.info(f"Completed {decision_type.value} decision: {result}")
+            
+            return result
+        return wrapper
+    return decorator
+
+
+class DecisionRequestMixin:
+    """
+    Mixin class that provides decision request functionality to RL agents.
+    
+    This mixin can be added to RL agent classes to provide standardized
+    decision request capabilities.
+    """
+    
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._decision_type = None
+        self._last_request_id = None
+        
+    def set_decision_type(self, decision_type: DecisionType) -> None:
+        """Set the decision type for this agent"""
+        self._decision_type = decision_type
+        
+    def make_decision_request(
+        self, 
+        state: Any, 
+        context: Optional[Dict[str, Any]] = None,
+        priority: int = 1,
+        timeout_seconds: float = 30.0
+    ) -> Optional[Any]:
+        """
+        Make a decision request through the decision system.
+        
+        Args:
+            state: Current state for the agent
+            context: Additional context
+            priority: Request priority
+            timeout_seconds: Request timeout
+            
+        Returns:
+            The decision/action if successful, None if failed
+        """
+        if not self._decision_type:
+            raise ValueError("Decision type not set for this agent")
+            
+        if context is None:
+            context = {}
+            
+        # Create request
+        request_id = decision_system.create_request(
+            agent_type=self._decision_type,
+            state=state,
+            context=context,
+            priority=priority,
+            timeout_seconds=timeout_seconds,
+            metadata={
+                "agent_class": self.__class__.__name__,
+                "agent_id": id(self)
+            }
+        )
+        
+        self._last_request_id = request_id
+        
+        # Process request
+        response = decision_system.process_request(request_id)
+        
+        if response:
+            return response.action
+        else:
+            return None
+            
+    def get_last_request_status(self) -> Optional[str]:
+        """Get the status of the last request made by this agent"""
+        if self._last_request_id:
+            status = decision_system.get_request_status(self._last_request_id)
+            return status.value if status else None
+        return None
+
+
+def register_agent_with_system(agent: Any, decision_type: DecisionType) -> None:
+    """
+    Register an agent with the decision request system.
+    
+    Args:
+        agent: The RL agent to register
+        decision_type: The type of decisions this agent can make
+    """
+    decision_system.register_agent_handler(decision_type, agent)
+    logger.info(f"Registered {agent.__class__.__name__} for {decision_type.value} decisions")
+
+
+def auto_register_agents(operator_instance: Any) -> None:
+    """
+    Automatically register all agents from an operator instance with the decision system.
+    
+    Args:
+        operator_instance: The operator instance containing agents
+    """
+    from .decision_request_system import DecisionType
+    
+    # Register pricing agent
+    if hasattr(operator_instance, "pricing_agent") and operator_instance.pricing_agent:
+        register_agent_with_system(operator_instance.pricing_agent, DecisionType.PRICING)
+        
+    # Register charging agent
+    if hasattr(operator_instance, "charging_agent") and operator_instance.charging_agent:
+        register_agent_with_system(operator_instance.charging_agent, DecisionType.CHARGING)
+        
+    # Register storage agent
+    if hasattr(operator_instance, "storage_agent") and operator_instance.storage_agent:
+        register_agent_with_system(operator_instance.storage_agent, DecisionType.STORAGE)
+        
+    logger.info("Auto-registered agents with decision request system")
diff --git a/simulation/operations/decision_request_system.py b/simulation/operations/decision_request_system.py
new file mode 100644
index 0000000..03ff17f
--- /dev/null
+++ b/simulation/operations/decision_request_system.py
@@ -0,0 +1,344 @@
+from typing import Any, Dict, List, Optional, Union
+from dataclasses import dataclass
+from enum import Enum
+import time
+import uuid
+from datetime import datetime
+
+
+class DecisionType(Enum):
+    """Types of decisions that RL agents can make"""
+    PRICING = "pricing"
+    CHARGING = "charging"
+    STORAGE = "storage"
+    ROUTING = "routing"
+
+
+class RequestStatus(Enum):
+    """Status of a decision request"""
+    PENDING = "pending"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+
+
+@dataclass
+class DecisionRequest:
+    """Represents a decision request for an RL agent"""
+    request_id: str
+    agent_type: DecisionType
+    state: Any
+    context: Dict[str, Any]
+    timestamp: datetime
+    status: RequestStatus
+    priority: int = 1
+    timeout_seconds: float = 30.0
+    metadata: Optional[Dict[str, Any]] = None
+    
+    def __post_init__(self):
+        if self.metadata is None:
+            self.metadata = {}
+
+
+@dataclass
+class DecisionResponse:
+    """Represents a response to a decision request"""
+    request_id: str
+    action: Any
+    confidence: Optional[float] = None
+    reasoning: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    
+    def __post_init__(self):
+        if self.metadata is None:
+            self.metadata = {}
+
+
+class DecisionRequestSystem:
+    """
+    Centralized system for managing decision requests from RL agents.
+    
+    This system provides a standardized way for RL agents to request decisions,
+    track request status, and handle responses. It supports:
+    - Request queuing and prioritization
+    - Timeout handling
+    - Request tracking and logging
+    - Integration with existing RL agent infrastructure
+    """
+    
+    def __init__(self):
+        self.requests: Dict[str, DecisionRequest] = {}
+        self.responses: Dict[str, DecisionResponse] = {}
+        self.request_history: List[DecisionRequest] = []
+        self.agent_handlers: Dict[DecisionType, Any] = {}
+        self.request_callbacks: Dict[str, callable] = {}
+        
+    def register_agent_handler(self, decision_type: DecisionType, handler: Any) -> None:
+        """
+        Register an agent handler for a specific decision type.
+        
+        Args:
+            decision_type: The type of decision this handler can process
+            handler: The agent object that can make decisions
+        """
+        self.agent_handlers[decision_type] = handler
+        
+    def create_request(
+        self,
+        agent_type: DecisionType,
+        state: Any,
+        context: Dict[str, Any],
+        priority: int = 1,
+        timeout_seconds: float = 30.0,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> str:
+        """
+        Create a new decision request.
+        
+        Args:
+            agent_type: Type of decision needed
+            state: Current state for the agent
+            context: Additional context information
+            priority: Request priority (higher = more important)
+            timeout_seconds: Timeout for the request
+            metadata: Additional metadata
+            
+        Returns:
+            Request ID for tracking
+        """
+        request_id = str(uuid.uuid4())
+        
+        request = DecisionRequest(
+            request_id=request_id,
+            agent_type=agent_type,
+            state=state,
+            context=context,
+            timestamp=datetime.now(),
+            status=RequestStatus.PENDING,
+            priority=priority,
+            timeout_seconds=timeout_seconds,
+            metadata=metadata or {}
+        )
+        
+        self.requests[request_id] = request
+        return request_id
+    
+    def process_request(self, request_id: str) -> Optional[DecisionResponse]:
+        """
+        Process a decision request using the appropriate agent.
+        
+        Args:
+            request_id: ID of the request to process
+            
+        Returns:
+            Decision response if successful, None if failed
+        """
+        if request_id not in self.requests:
+            raise ValueError(f"Request {request_id} not found")
+            
+        request = self.requests[request_id]
+        
+        # Check if agent handler exists
+        if request.agent_type not in self.agent_handlers:
+            request.status = RequestStatus.FAILED
+            request.metadata["error"] = f"No handler registered for {request.agent_type}"
+            return None
+            
+        # Check timeout
+        if self._is_request_timed_out(request):
+            request.status = RequestStatus.TIMEOUT
+            return None
+            
+        # Update status
+        request.status = RequestStatus.PROCESSING
+        
+        try:
+            # Get the appropriate agent handler
+            agent = self.agent_handlers[request.agent_type]
+            
+            # Process the request based on agent type
+            response = self._process_with_agent(agent, request)
+            
+            if response:
+                request.status = RequestStatus.COMPLETED
+                self.responses[request_id] = response
+            else:
+                request.status = RequestStatus.FAILED
+                
+            return response
+            
+        except Exception as e:
+            request.status = RequestStatus.FAILED
+            request.metadata["error"] = str(e)
+            return None
+    
+    def _process_with_agent(self, agent: Any, request: DecisionRequest) -> Optional[DecisionResponse]:
+        """
+        Process request with the specific agent type.
+        
+        Args:
+            agent: The agent to use for decision making
+            request: The decision request
+            
+        Returns:
+            Decision response
+        """
+        try:
+            if request.agent_type == DecisionType.PRICING:
+                return self._process_pricing_request(agent, request)
+            elif request.agent_type == DecisionType.CHARGING:
+                return self._process_charging_request(agent, request)
+            elif request.agent_type == DecisionType.STORAGE:
+                return self._process_storage_request(agent, request)
+            elif request.agent_type == DecisionType.ROUTING:
+                return self._process_routing_request(agent, request)
+            else:
+                raise ValueError(f"Unknown decision type: {request.agent_type}")
+                
+        except Exception as e:
+            request.metadata["processing_error"] = str(e)
+            return None
+    
+    def _process_pricing_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse:
+        """Process pricing decision request"""
+        # Set agent state
+        agent.state = request.state
+        
+        # Get evaluation flag from context
+        eval_ep = request.context.get("eval_ep", False)
+        
+        # Get action from agent
+        if hasattr(agent, "pick_action"):
+            # Handle different pick_action signatures
+            import inspect
+            sig = inspect.signature(agent.pick_action)
+            if len(sig.parameters) > 1:  # Method expects eval_ep parameter
+                action = agent.pick_action(eval_ep)
+            else:  # Method doesn't expect eval_ep parameter
+                action = agent.pick_action()
+        else:
+            raise ValueError("Agent does not have pick_action method")
+            
+        return DecisionResponse(
+            request_id=request.request_id,
+            action=action,
+            metadata={"agent_type": "pricing", "eval_ep": eval_ep}
+        )
+    
+    def _process_charging_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse:
+        """Process charging decision request"""
+        # Set agent state
+        agent.state = request.state
+        
+        # Get evaluation flag from context
+        eval_ep = request.context.get("eval_ep", False)
+        
+        # Get action from agent
+        if hasattr(agent, "pick_action"):
+            # Handle different pick_action signatures
+            import inspect
+            sig = inspect.signature(agent.pick_action)
+            if len(sig.parameters) > 1:  # Method expects eval_ep parameter
+                action = agent.pick_action(eval_ep)
+            else:  # Method doesn't expect eval_ep parameter
+                action = agent.pick_action()
+        else:
+            raise ValueError("Agent does not have pick_action method")
+            
+        # Rescale action if needed
+        if hasattr(agent, "rescale_action"):
+            action = agent.rescale_action(action)
+            
+        return DecisionResponse(
+            request_id=request.request_id,
+            action=action,
+            metadata={"agent_type": "charging", "eval_ep": eval_ep}
+        )
+    
+    def _process_storage_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse:
+        """Process storage decision request"""
+        # Set agent state
+        agent.state = request.state
+        
+        # Get evaluation flag from context
+        eval_ep = request.context.get("eval_ep", False)
+        charging_hub = request.context.get("charging_hub", None)
+        
+        # Get action from agent
+        if hasattr(agent, "pick_action"):
+            # Handle different pick_action signatures
+            import inspect
+            sig = inspect.signature(agent.pick_action)
+            if len(sig.parameters) > 2:  # Method expects eval_ep and charging_hub parameters
+                action = agent.pick_action(eval_ep, charging_hub)
+            elif len(sig.parameters) > 1:  # Method expects eval_ep parameter
+                action = agent.pick_action(eval_ep)
+            else:  # Method doesn't expect eval_ep parameter
+                action = agent.pick_action()
+        else:
+            raise ValueError("Agent does not have pick_action method")
+            
+        return DecisionResponse(
+            request_id=request.request_id,
+            action=action,
+            metadata={"agent_type": "storage", "eval_ep": eval_ep}
+        )
+    
+    def _process_routing_request(self, agent: Any, request: DecisionRequest) -> DecisionResponse:
+        """Process routing decision request"""
+        # For routing, we might need different logic
+        # This is a placeholder for future implementation
+        raise NotImplementedError("Routing decisions not yet implemented")
+    
+    def get_response(self, request_id: str) -> Optional[DecisionResponse]:
+        """Get the response for a request"""
+        return self.responses.get(request_id)
+    
+    def get_request_status(self, request_id: str) -> Optional[RequestStatus]:
+        """Get the status of a request"""
+        if request_id in self.requests:
+            return self.requests[request_id].status
+        return None
+    
+    def _is_request_timed_out(self, request: DecisionRequest) -> bool:
+        """Check if a request has timed out"""
+        elapsed = (datetime.now() - request.timestamp).total_seconds()
+        return elapsed > request.timeout_seconds
+    
+    def cleanup_old_requests(self, max_age_hours: float = 24.0) -> None:
+        """Clean up old requests and responses"""
+        current_time = datetime.now()
+        cutoff_time = current_time.timestamp() - (max_age_hours * 3600)
+        
+        # Move old requests to history
+        old_requests = [
+            req for req in self.requests.values()
+            if req.timestamp.timestamp() < cutoff_time
+        ]
+        
+        for req in old_requests:
+            self.request_history.append(req)
+            del self.requests[req.request_id]
+            if req.request_id in self.responses:
+                del self.responses[req.request_id]
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get statistics about the request system"""
+        total_requests = len(self.requests) + len(self.request_history)
+        completed = len([r for r in self.requests.values() if r.status == RequestStatus.COMPLETED])
+        failed = len([r for r in self.requests.values() if r.status == RequestStatus.FAILED])
+        pending = len([r for r in self.requests.values() if r.status == RequestStatus.PENDING])
+        
+        return {
+            "total_requests": total_requests,
+            "active_requests": len(self.requests),
+            "completed": completed,
+            "failed": failed,
+            "pending": pending,
+            "success_rate": completed / total_requests if total_requests > 0 else 0.0
+        }
+
+
+# Global instance for easy access
+decision_system = DecisionRequestSystem()
diff --git a/simulation/operations/operator.py b/simulation/operations/operator.py
index a92c65c..fd18a13 100644
--- a/simulation/operations/operator.py
+++ b/simulation/operations/operator.py
@@ -12,6 +12,9 @@
 
 from simulation.operations.NonLinearAlgorithms import nonlinear_pricing
 from simulation.operations.Operator_utils import compute_free_grid_capacity
+from simulation.operations.pricing_service import PricingService
+from simulation.operations.charging_service import ChargingService
+from simulation.operations.storage_service import StorageService
 from utilities.rl_environments.rl_pricing_env import convert_to_vector
 
 
@@ -79,6 +82,7 @@ def __init__(
         service_level: float,
         charging_hub: Any,
         minimum_served_demand: float,
+        agents_controller: Optional[Any] = None,
     ):
         """
         Initialize the Operator with simulation parameters and strategies.
@@ -129,6 +133,12 @@ def __init__(
         )
         self._init_agents_and_events()
         self._init_capacity_tracking()
+        # Optional RL agents controller (pricing/charging/storage)
+        self.agents_controller = agents_controller
+        # Service composition for RL agents
+        self.pricing_service = PricingService(operator=self, agents_controller=self.agents_controller)
+        self.charging_service = ChargingService(operator_instance=self, agents_controller=self.agents_controller)
+        self.storage_service = StorageService(operator_instance=self, agents_controller=self.agents_controller)
         
         # Initialize based on configuration
         self._initialize_strategy_dependent_behavior()
@@ -545,92 +555,39 @@ def _set_storage_discharging(self, power: float) -> None:
 
     def take_dynamic_pricing_actions(self) -> None:
         """Execute dynamic pricing actions and update price history."""
-        self.get_exp_free_grid_capacity()
-        self.update_vehicles_status()
-        self.take_pricing_action()
-        self._update_dynamic_price_history()
+        self.pricing_service.take_dynamic_pricing_actions()
 
     def take_static_pricing_action(self) -> None:
         """Execute static pricing actions and update price history."""
-        self.get_exp_free_grid_capacity()
-        self.update_vehicles_status()
-        self._update_pricing_parameters()
-        self._update_static_price_history()
+        self.pricing_service.take_static_pricing_action()
 
     def _update_dynamic_price_history(self) -> None:
-        """Update price history for dynamic pricing modes."""
-        if self.pricing_mode == "Discrete":
-            self._add_discrete_price_to_history()
-        elif self.pricing_mode == "Continuous":
-            self._add_continuous_price_to_history()
+        # Backward-compat shim; delegate to service
+        self.pricing_service._update_dynamic_price_history()
 
     def _update_static_price_history(self) -> None:
-        """Update price history for static pricing modes."""
-        if self.pricing_mode == "Discrete":
-            self._add_discrete_price_to_history()
-        elif self.pricing_mode in ["Continuous", "ToU"]:
-            self._add_continuous_price_to_history()
+        self.pricing_service._update_static_price_history()
 
     def _update_pricing_parameters(self) -> None:
-        """Update pricing parameters based on current mode and time."""
-        if self.pricing_mode == "ToU":
-            self._update_tou_pricing()
-        elif self.pricing_mode == "perfect_info":
-            self._update_perfect_info_pricing()
+        self.pricing_service._update_pricing_parameters()
 
     def _update_tou_pricing(self) -> None:
-        """Update Time-of-Use pricing parameters."""
-        hour = self._get_current_hour()
-        max_price = Configuration.instance().max_price_ToU
-        self.pricing_parameters[0] = (
-            self.electricity_tariff[hour] / max(self.electricity_tariff) * max_price
-        )
+        self.pricing_service._update_tou_pricing()
 
     def _update_perfect_info_pricing(self) -> None:
-        """Update perfect information pricing parameters."""
-        hour = self._get_current_hour()
-        config = Configuration.instance()
-        
-        if config.dynamic_fix_term_pricing:
-            self.pricing_parameters[1] = self.price_schedules[1][hour]
-            self.pricing_parameters[0] = self.price_schedules[0][hour]
-        else:
-            self.pricing_parameters[1] = self.price_schedules[hour]
+        self.pricing_service._update_perfect_info_pricing()
 
     def _get_current_hour(self) -> int:
-        """Get current hour of the simulation."""
-        return int((self.env.now % 1440) / 60)
+        return self.pricing_service._get_current_hour()
 
     def _add_discrete_price_to_history(self) -> None:
-        """Add discrete pricing data to price history."""
-        self.price_history = pd.concat([
-            self.price_history,
-            pd.DataFrame(self.price_pairs[:, 1]).transpose(),
-        ])
+        self.pricing_service._add_discrete_price_to_history()
 
     def _add_continuous_price_to_history(self) -> None:
-        """Add continuous pricing data to price history."""
-        self.price_history = pd.concat([
-            self.price_history,
-            pd.DataFrame([
-                self.pricing_parameters[0], 
-                self.pricing_parameters[1]
-            ]).transpose(),
-        ])
+        self.pricing_service._add_continuous_price_to_history()
 
     def get_current_pricing_data(self) -> PricingData:
-        """
-        Get current pricing information as structured data.
-        
-        Returns:
-            PricingData: Current pricing information
-        """
-        return PricingData(
-            energy_price=self.pricing_parameters[0] if len(self.pricing_parameters) > 0 else 0.0,
-            parking_price=self.parking_fee,
-            pricing_mode=self.pricing_mode,
-            price_history=self.price_history
-        )
+        return self.pricing_service.get_current_pricing_data()
 
     # ============================================================================
     # CHARGING ACTION METHODS
@@ -793,22 +750,15 @@ def _update_peak_threshold(self) -> None:
             self.peak_threshold = current_peak
     def take_learning_charging_actions(self, charging_strategy):
         if charging_strategy == "dynamic":
-            self.update_vehicles_status()
-            self.take_charging_action()
-            self.conduct_charging_action()
-
-            if self.storage_agent:
-                self.get_exp_free_grid_capacity()
-                self.take_storage_action()
-                self.conduct_storage_action()
+            self.charging_service.take_learning_charging_actions(charging_strategy)
 
     def update_learning_charging_and_pricing_agents(self, charging_strategy):
         if charging_strategy == "dynamic":
-            self.update_charging_agent()
+            self.charging_service.update_learning_charging_agent(charging_strategy)
             if self.storage_agent:
-                self.update_storage_agent()
+                self.storage_service.update_storage_agent()
         if self.charging_hub.dynamic_pricing:
-            self.update_pricing_agent()
+            self.pricing_service.update_pricing_agent()
 
     def get_charging_schedules_and_prices(self, charging_strategy, mode):
         """
@@ -849,93 +799,11 @@ def get_charging_schedules_and_prices(self, charging_strategy, mode):
 
             self.update_learning_charging_and_pricing_agents(charging_strategy)
 
-    def take_charging_action(self):
-        state = self.charging_hub.charging_agent.environment.get_state(
-            self.charging_hub, self.env
-        )
-        self.charging_agent.state = state
-
-        eval_ep = self.charging_agent.do_evaluation_iterations
-        self.charging_agent.episode_step_number_val = 0
-        # while not self.done:
-        action = self.charging_agent.pick_action(eval_ep, self.charging_hub)
-        self.charging_agent.action = self.charging_agent.rescale_action(action)
-
-    def take_pricing_action(self):
-        # Get current state from environment
-        pricing_state = self.pricing_agent.environment.get_state(self.charging_hub, self.env)
-        self.pricing_agent.state = pricing_state
-        eval_ep = self.pricing_agent.do_evaluation_iterations
-
-        pricing_mode = Configuration.instance().pricing_mode
-        agent_name = self.pricing_agent.agent_name
-
-        if pricing_mode == "Discrete":
-            if agent_name == "DQN":
-                self.pricing_agent.action = self.pricing_agent.pick_action()
-                if len(self.price_pairs[:, 1]) > 1:
-                    vector_prices = convert_to_vector(self.pricing_agent.action)
-                else:
-                    vector_prices = [self.pricing_agent.action]
-                final_pricing = self.pricing_agent.environment.get_final_prices_DQN(vector_prices)
-                for i, price in enumerate(final_pricing):
-                    self.price_pairs[i, 1] = price
-
-            elif agent_name == "SAC":
-                self.pricing_agent.action = self.pricing_agent.pick_action(eval_ep, self.charging_hub)
-                rescaled_actions = self.pricing_agent.environment.rescale_action(self.pricing_agent.action)
-                number_of_power_options = len(self.price_pairs[:, 1])
-                final_pricing = rescaled_actions[:number_of_power_options]
-                self.price_pairs[0, 1] = final_pricing[0]
-                self.price_pairs[1, 1] = min(final_pricing[1], 1.5)
-
-                # Optional: handle grid capacity and storage
-                # if Configuration.instance().limiting_grid_capa:
-                #     self.grid_capa = rescaled_actions[number_of_power_options]
-                # if len(rescaled_actions) >= number_of_power_options + 2:
-                #     self.storage_agent.action = [rescaled_actions[number_of_power_options + 1]]
-                #     self.conduct_storage_action()
-
-        elif pricing_mode == "Continuous":
-            self.pricing_agent.action = self.pricing_agent.pick_action(eval_ep, self.charging_hub)
-            rescaled_actions = self.pricing_agent.environment.rescale_action(self.pricing_agent.action)
-
-            config = Configuration.instance()
-            if not config.dynamic_fix_term_pricing and config.capacity_pricing:
-                self.pricing_parameters[1] = rescaled_actions[0]
-
-            elif config.dynamic_fix_term_pricing and not config.capacity_pricing:
-                self.pricing_parameters[0] = rescaled_actions[0]
-                if config.dynamic_parking_fee:
-                    self.parking_fee = rescaled_actions[1]
-
-            elif config.dynamic_fix_term_pricing and config.capacity_pricing:
-                self.pricing_parameters[0] = rescaled_actions[0]
-                self.pricing_parameters[1] = rescaled_actions[1]
-
-            if config.limiting_grid_capa:
-                self.grid_capa = rescaled_actions[1]
-
-            if config.dynamic_storage_scheduling:
-                self.storage_agent.action = [rescaled_actions[1]]
-
-            self.conduct_storage_action(given_storage_action=[rescaled_actions[1]])
-
-        # Reset reward at the end
-        self.charging_hub.grid.reset_reward() #TODO: it does not belong to the grid object
-
-    def take_storage_action(self):
-        storage_state = self.charging_hub.storage_agent.environment.get_state(
-            self.charging_hub, self.env
-        )
-        self.storage_agent.state = storage_state
 
-        eval_ep = self.storage_agent.do_evaluation_iterations
-        self.storage_agent.episode_step_number_val = 0
-        # while not self.done:
-        self.storage_agent.action = self.storage_agent.pick_action(
-            eval_ep, self.charging_hub
-        )
+
+
+
+
 
     def get_battery_max_min(self):
         bound_1 = (
@@ -1072,160 +940,17 @@ def check_storage(self, given_storage_action=None):
         # Track feasibility deviation
         self.charging_hub.reward["feasibility_storage"] += abs(raw_storage_power - storage_power)
 
-    def conduct_storage_action(self, given_storage_action=None):
-        if given_storage_action:
-            storage_power = given_storage_action[0]
-        else:
-            storage_power = self.storage_agent.action[0]
-        if storage_power >= 0:
-            self.charging_hub.electric_storage.charge_yn = 1
-            self.charging_hub.electric_storage.charging_power = storage_power
-            self.charging_hub.electric_storage.discharge_yn = 0
-            self.charging_hub.electric_storage.discharging_power = 0
-        elif storage_power < 0:
-            self.charging_hub.electric_storage.charge_yn = 0
-            self.charging_hub.electric_storage.charging_power = 0
-            self.charging_hub.electric_storage.discharge_yn = 1
-            self.charging_hub.electric_storage.discharging_power = -storage_power
-        self.check_storage(given_storage_action=given_storage_action)
-
-    def conduct_charging_action(self):
-        action = self.charging_agent.action
-        action_index = 1  # Start from 1 because action[0] is reserved (possibly for pricing or metadata)
 
-        for charger in self.charging_hub.chargers:
-            for connector_idx in range(charger.number_of_connectors):
-                if action_index >= len(action):
-                    break  # Prevent index error if action list is shorter than expected
 
-                charging_power = action[action_index]
-                if charging_power > 0:
-                    charging_vehicles = charger.charging_vehicles
-                    if connector_idx < len(charging_vehicles):
-                        vehicle = charging_vehicles[connector_idx]
-                        vehicle.charging_power = charging_power
-                action_index += 1
 
-        self.check_charging_power()
-        self.charging_hub.grid.reset_reward()
 
     def update_pricing_agent(self):
-        self.update_vehicles_status()
-
-        if not self.charging_agent:
-            # TODO: do we need to recalculate it?
-            self.charging_hub.reward["missed"] = self.reward_computing()
-
-        agent = self.pricing_agent
-        agent_name = agent.agent_name
-        config = agent.config
-
-        if agent_name == "SAC":
-            agent.conduct_action(agent.action, self.charging_hub, self.env)
-            eval_ep = agent.do_evaluation_iterations
-
-            if agent.time_for_critic_and_actor_to_learn() and not eval_ep:
-                for _ in range(agent.hyperparameters["learning_updates_per_learning_session"]):
-                    agent.learn()
-
-            mask = False if agent.global_step_number >= agent.environment._max_episode_steps else agent.done
+        # Delegated to PricingService for backward compatibility
+        self.pricing_service.update_pricing_agent()
 
-            agent.save_experience(
-                experience=(
-                    agent.state,
-                    agent.action,
-                    agent.reward,
-                    agent.next_state,
-                    mask,
-                )
-            )
-
-        elif agent_name == "DQN":
-            agent.conduct_action(agent.action, self.charging_hub, self.env)
-
-            if agent.time_for_q_network_to_learn():
-                for _ in range(agent.hyperparameters["learning_iterations"]):
-                    agent.learn()
 
-            agent.save_experience(
-                experience=(
-                    agent.state,
-                    agent.action,
-                    agent.reward,
-                    agent.next_state,
-                    False,
-                )
-            )
 
-        agent.global_step_number += 1
 
-    def update_storage_agent(self):
-
-        eval_ep = self.storage_agent.do_evaluation_iterations
-        action = self.storage_agent.descale_action(
-            self.storage_agent.action, self.charging_hub
-        )
-        self.storage_agent.conduct_action(action, self.charging_hub, self.env, eval_ep=eval_ep)
-        if self.storage_agent.time_for_critic_and_actor_to_learn():
-            for _ in range(
-                self.storage_agent.hyperparameters[
-                    "learning_updates_per_learning_session"
-                ]
-            ):
-                self.storage_agent.learn()
-        mask = (
-            False
-            if self.storage_agent.episode_step_number_val
-            >= self.storage_agent.environment._max_episode_steps
-            else self.storage_agent.done
-        )
-        # if not eval_ep:
-
-        self.storage_agent.save_experience(
-            experience=(
-                self.storage_agent.state,
-                action,
-                self.storage_agent.reward,
-                self.storage_agent.next_state,
-                mask,
-            )
-        )
-        self.storage_agent.global_step_number += 1
-        self.storage_agent.step_counter += 1
-
-    def update_charging_agent(self):
-        self.update_vehicles_status()
-        self.charging_hub.reward["missed"] = self.reward_computing()
-
-        eval_ep = self.charging_agent.do_evaluation_iterations
-        self.charging_agent.conduct_action(self.charging_agent.action, self.charging_hub, self.env)
-        if self.charging_agent.time_for_critic_and_actor_to_learn():
-            if not eval_ep:
-                for _ in range(
-                    self.charging_agent.hyperparameters[
-                        "learning_updates_per_learning_session"
-                    ]
-                ):
-                    self.charging_agent.learn()
-        mask = (
-            False
-            if self.charging_agent.episode_step_number_val
-            >= self.charging_agent.environment._max_episode_steps
-            else self.charging_agent.done
-        )
-        # if not eval_ep:
-        action = self.charging_agent.descale_action(self.charging_agent.action, self.charging_hub)
-        self.charging_agent.save_experience(
-            experience=(
-                self.charging_agent.state,
-                action,
-                self.charging_agent.reward,
-                self.charging_agent.next_state,
-                mask,
-            )
-        )
-        self.charging_agent.global_step_number += 1
-        self.charging_agent.step_counter += 1
 
     def get_storage_schedule(self, storage_strategy, mode):
         """
@@ -1376,11 +1101,17 @@ def get_hub_generation_kW(self):
 
         t = self.env.now
 
-        generation_current_period = (
-            self.non_dispatchable_generator.generation_profile_actual.loc[t][
-                "pv_generation"
-            ]
-        )
+        # Check if the time index exists in the generation profile
+        if t in self.non_dispatchable_generator.generation_profile_actual.index:
+            generation_current_period = (
+                self.non_dispatchable_generator.generation_profile_actual.loc[t][
+                    "pv_generation"
+                ]
+            )
+        else:
+            # If time index doesn't exist, return 0 (no generation)
+            # This handles cases where simulation runs longer than available data
+            generation_current_period = 0.0
 
         return generation_current_period
 
@@ -1526,10 +1257,6 @@ def request_process(self, request):
                     request.energy_charged += (
                         request.charging_power / 60
                     )  # sim unit time is minutes so need to divide by 60
-                    request.calculate_profit_reward(
-                        self.charging_hub.penalty_for_missed_kWh,
-                        self.electricity_tariff,
-                    )
                     if request.charging_power < 0:
                         lg.warning(
                             f"charging power of {request.id} is negative{request.charging_power}"
diff --git a/simulation/operations/pricing_service.py b/simulation/operations/pricing_service.py
new file mode 100644
index 0000000..edf988d
--- /dev/null
+++ b/simulation/operations/pricing_service.py
@@ -0,0 +1,378 @@
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+from simpy import Environment
+import pandas as pd
+from resources.configuration.configuration import Configuration
+from utilities.rl_environments.rl_pricing_env import convert_to_vector
+from .decision_request_system import DecisionType, decision_system
+from .decision_decorators import auto_register_agents
+
+# Set up logger for this module
+logger = logging.getLogger(__name__)
+
+class PricingService:
+    """
+    Encapsulates all pricing-related behavior away from Operator.
+    Accesses Operator state via the provided reference.
+    """
+
+    def __init__(self, operator: Any, agents_controller: Any | None = None):
+        self.op = operator
+        self.agents_controller = agents_controller
+        
+        # Register agents with the decision request system
+        auto_register_agents(operator)
+
+    # Public APIs used by Operator
+    def take_dynamic_pricing_actions(self) -> None:
+        self.op.get_exp_free_grid_capacity()
+        self.op.update_vehicles_status()
+
+        # Prefer external agent when available
+        used_agent = False
+        if getattr(self, "agents_controller", None) and getattr(self.agents_controller, "pricing", None):
+            context: Dict[str, Any] = {"charging_hub": self.op.charging_hub, "env": self.op.env}
+            action_dict = self.agents_controller.pricing_step(context)
+            if action_dict and "action" in action_dict:
+                action = action_dict["action"]
+                try:
+                    import numpy as _np
+                    if _np.isscalar(action):
+                        action = convert_to_vector(int(action), h=1)
+                    action_list = action.tolist() if hasattr(action, "tolist") else list(action)
+                except Exception:
+                    action_list = [action]
+
+                if not hasattr(self.op, "pricing_parameters") or self.op.pricing_parameters is None:
+                    self.op.pricing_parameters = [0.0, 0.0]
+                if len(action_list) >= 1:
+                    self.op.pricing_parameters[0] = action_list[0]
+                if len(action_list) >= 2:
+                    if len(self.op.pricing_parameters) < 2:
+                        self.op.pricing_parameters.append(0.0)
+                    self.op.pricing_parameters[1] = action_list[1]
+                used_agent = True
+
+        if not used_agent:
+            # Fallback to previous behavior
+            self.take_pricing_action()
+
+        self._update_dynamic_price_history()
+
+    def take_static_pricing_action(self) -> None:
+        self.op.get_exp_free_grid_capacity()
+        self.op.update_vehicles_status()
+        self._update_pricing_parameters()
+        self._update_static_price_history()
+
+    def take_pricing_action(self) -> None:
+        # Check if we have a pricing agent and if it has an environment
+        if not self.op.pricing_agent:
+            return
+            
+        # Determine if this is an RL agent with environment or a rule-based/algorithm agent
+        if hasattr(self.op.pricing_agent, 'environment'):
+            # RL agent with environment - use existing logic
+            pricing_state = self.op.pricing_agent.environment.get_state(self.op.charging_hub, self.op.env)
+            self.op.pricing_agent.state = pricing_state
+            eval_ep = self.op.pricing_agent.do_evaluation_iterations
+
+            pricing_mode = Configuration.instance().pricing_mode
+            agent_name = self.op.pricing_agent.agent_name
+
+            if pricing_mode == "Discrete":
+                if agent_name == "DQN":
+                    # Use decision request system for DQN pricing
+                    action = self._get_pricing_decision_via_request(eval_ep=False)
+                    self.op.pricing_agent.action = action
+                    
+                    if len(self.op.price_pairs[:, 1]) > 1:
+                        vector_prices = convert_to_vector(self.op.pricing_agent.action)
+                    else:
+                        vector_prices = [self.op.pricing_agent.action]
+                    final_pricing = self.op.pricing_agent.environment.get_final_prices_DQN(vector_prices)
+                    for i, price in enumerate(final_pricing):
+                        self.op.price_pairs[i, 1] = price
+
+                elif agent_name == "SAC":
+                    # Use decision request system for SAC pricing
+                    action = self._get_pricing_decision_via_request(eval_ep)
+                    self.op.pricing_agent.action = action
+                    
+                    rescaled_actions = self.op.pricing_agent.environment.rescale_action(self.op.pricing_agent.action)
+                    number_of_power_options = len(self.op.price_pairs[:, 1])
+                    final_pricing = rescaled_actions[:number_of_power_options]
+                    self.op.price_pairs[0, 1] = final_pricing[0]
+                    self.op.price_pairs[1, 1] = min(final_pricing[1], 1.5)
+
+            elif pricing_mode == "Continuous":
+                # Use decision request system for continuous pricing
+                action = self._get_pricing_decision_via_request(eval_ep)
+                self.op.pricing_agent.action = action
+                
+                rescaled_actions = self.op.pricing_agent.environment.rescale_action(self.op.pricing_agent.action)
+
+                config = Configuration.instance()
+                if not config.dynamic_fix_term_pricing and config.capacity_pricing:
+                    self.op.pricing_parameters[1] = rescaled_actions[0]
+
+                elif config.dynamic_fix_term_pricing and not config.capacity_pricing:
+                    self.op.pricing_parameters[0] = rescaled_actions[0]
+                    if config.dynamic_parking_fee:
+                        self.op.parking_fee = rescaled_actions[1]
+
+                elif config.dynamic_fix_term_pricing and config.capacity_pricing:
+                    self.op.pricing_parameters[0] = rescaled_actions[0]
+                    self.op.pricing_parameters[1] = rescaled_actions[1]
+
+                if config.limiting_grid_capa:
+                    self.op.grid_capa = rescaled_actions[1]
+
+                if config.dynamic_storage_scheduling:
+                    self.op.storage_agent.action = [rescaled_actions[1]]
+
+                # Use storage service instead of direct call
+                if hasattr(self.op, 'storage_service'):
+                    self.op.storage_service.conduct_storage_action(given_storage_action=[rescaled_actions[1]])
+                else:
+                    # Fallback for backward compatibility
+                    self.op.conduct_storage_action(given_storage_action=[rescaled_actions[1]])
+        else:
+            # Rule-based or algorithm agent - use agent decision system
+            try:
+                from simulation.operations.agent_decision_system import agent_decision_system
+                from utilities.rl_agents.interfaces import DecisionType
+                
+                # Create context for the agent
+                context = {
+                    "eval_ep": False,
+                    "charging_hub": self.op.charging_hub,
+                    "env": self.op.env,
+                    "pricing_mode": Configuration.instance().pricing_mode,
+                    "current_demand": self.op.get_hub_load_kW(),
+                    "grid_capacity": self.op.get_exp_free_grid_capacity().free_grid_capa_actual[0] if hasattr(self.op, 'free_grid_capa_actual') and len(self.op.free_grid_capa_actual) > 0 else 1000
+                }
+                
+                # Get decision from agent decision system
+                decision = agent_decision_system.make_decision(
+                    DecisionType.PRICING, 
+                    context,
+                    timeout_seconds=30.0
+                )
+                
+                # Apply the decision
+                if decision and decision.action:
+                    action = decision.action
+                    
+                    # Extract pricing information from action
+                    if "energy_price" in action:
+                        # Update energy prices
+                        energy_price = action["energy_price"]
+                        if len(self.op.price_pairs[:, 1]) > 1:
+                            # Multiple price levels - energy_price should be a list
+                            if isinstance(energy_price, list):
+                                for i, price in enumerate(energy_price):
+                                    if i < len(self.op.price_pairs[:, 1]):
+                                        self.op.price_pairs[i, 1] = price
+                            else:
+                                # Single price - apply to all levels
+                                for i in range(len(self.op.price_pairs[:, 1])):
+                                    self.op.price_pairs[i, 1] = energy_price
+                        else:
+                            # Single price level
+                            if isinstance(energy_price, list):
+                                self.op.price_pairs[0, 1] = energy_price[0]
+                            else:
+                                self.op.price_pairs[0, 1] = energy_price
+                    
+                    if "parking_fee" in action:
+                        self.op.parking_fee = action["parking_fee"]
+                        
+                    if "pricing_parameters" in action:
+                        params = action["pricing_parameters"]
+                        if len(params) > 0:
+                            self.op.pricing_parameters[0] = params[0]  # Fixed term
+                        if len(params) > 1:
+                            self.op.pricing_parameters[1] = params[1]  # Rate-based term
+                            
+                    logger.info(f"Applied pricing decision: {action}")
+                    
+            except Exception as e:
+                logger.error(f"Error using agent decision system for pricing: {e}")
+                logger.warning("Falling back to default pricing")
+                # Fallback to default pricing
+                pass
+
+        # Reset reward at the end
+        self.op.charging_hub.grid.reset_reward()
+
+    def update_pricing_agent(self) -> None:
+        self.op.update_vehicles_status()
+
+        if not self.op.charging_agent:
+            self.op.charging_hub.reward["profit"] = self.op.reward_computing()
+
+        agent = self.op.pricing_agent
+        if not agent:
+            return
+            
+        # Check if this is an RL agent with agent_name or a rule-based agent
+        if hasattr(agent, 'agent_name'):
+            # RL agent - use existing logic
+            agent_name = agent.agent_name
+
+            if agent_name == "SAC":
+                agent.conduct_action(agent.action)
+                eval_ep = agent.do_evaluation_iterations
+
+                if agent.time_for_critic_and_actor_to_learn() and not eval_ep:
+                    for _ in range(agent.hyperparameters["learning_updates_per_learning_session"]):
+                        agent.learn()
+
+                mask = False if agent.global_step_number >= agent.environment.MAX_EPISODE_STEPS else agent.done
+
+                agent.save_experience(
+                    experience=(
+                        agent.state,
+                        agent.action,
+                        agent.reward,
+                        agent.next_state,
+                        mask,
+                    )
+                )
+
+            elif agent_name == "DQN":
+                agent.conduct_action(agent.action)
+
+                if agent.time_for_q_network_to_learn():
+                    for _ in range(agent.hyperparameters["learning_iterations"]):
+                        agent.learn()
+
+                agent.save_experience(
+                    experience=(
+                        agent.state,
+                        agent.action,
+                        agent.reward,
+                        agent.next_state,
+                        False,
+                    )
+                )
+
+            # Update global step number for RL agents
+            if hasattr(agent, 'global_step_number'):
+                agent.global_step_number += 1
+        else:
+            # Rule-based or algorithm agent - no learning needed
+            logger.info(f"Updated {agent.__class__.__name__} (no learning required)")
+
+    def get_current_pricing_data(self):
+        from dataclasses import dataclass
+
+        @dataclass
+        class PricingData:
+            energy_price: float
+            parking_price: float
+            pricing_mode: str
+            price_history: pd.DataFrame
+
+        params = getattr(self.op, "pricing_parameters", [0.0])
+        return PricingData(
+            energy_price=params[0] if len(params) > 0 else 0.0,
+            parking_price=self.op.parking_fee,
+            pricing_mode=self.op.pricing_mode,
+            price_history=self.op.price_history,
+        )
+
+    def _get_pricing_decision_via_request(self, eval_ep: bool = False) -> Any:
+        """
+        Get pricing decision through the decision request system.
+        
+        Args:
+            eval_ep: Whether this is an evaluation episode
+            
+        Returns:
+            The pricing action/decision
+        """
+        # Create context for the decision request
+        context = {
+            "eval_ep": eval_ep,
+            "pricing_mode": Configuration.instance().pricing_mode,
+            "agent_name": self.op.pricing_agent.agent_name,
+            "charging_hub": self.op.charging_hub,
+            "env": self.op.env
+        }
+        
+        # Create and process decision request
+        request_id = decision_system.create_request(
+            agent_type=DecisionType.PRICING,
+            state=self.op.pricing_agent.state,
+            context=context,
+            metadata={
+                "pricing_mode": context["pricing_mode"],
+                "agent_name": context["agent_name"]
+            }
+        )
+        
+        # Process the request
+        response = decision_system.process_request(request_id)
+        
+        if response:
+            return response.action
+        else:
+            # Fallback to direct agent call if request system fails
+            return self.op.pricing_agent.pick_action(eval_ep)
+
+    # Internal helpers (ported from Operator)
+    def _update_dynamic_price_history(self) -> None:
+        if self.op.pricing_mode == "Discrete":
+            self._add_discrete_price_to_history()
+        elif self.op.pricing_mode == "Continuous":
+            self._add_continuous_price_to_history()
+
+    def _update_static_price_history(self) -> None:
+        if self.op.pricing_mode == "Discrete":
+            self._add_discrete_price_to_history()
+        elif self.op.pricing_mode in ["Continuous", "ToU"]:
+            self._add_continuous_price_to_history()
+
+    def _update_pricing_parameters(self) -> None:
+        if self.op.pricing_mode == "ToU":
+            self._update_tou_pricing()
+        elif self.op.pricing_mode == "perfect_info":
+            self._update_perfect_info_pricing()
+
+    def _update_tou_pricing(self) -> None:
+        hour = self._get_current_hour()
+        max_price = Configuration.instance().max_price_ToU
+        self.op.pricing_parameters[0] = (
+            self.op.electricity_tariff[hour] / max(self.op.electricity_tariff) * max_price
+        )
+
+    def _update_perfect_info_pricing(self) -> None:
+        hour = self._get_current_hour()
+        config = Configuration.instance()
+        if config.dynamic_fix_term_pricing:
+            self.op.pricing_parameters[1] = self.op.price_schedules[1][hour]
+            self.op.pricing_parameters[0] = self.op.price_schedules[0][hour]
+        else:
+            self.op.pricing_parameters[1] = self.op.price_schedules[hour]
+
+    def _get_current_hour(self) -> int:
+        return int((self.op.env.now % 1440) / 60)
+
+    def _add_discrete_price_to_history(self) -> None:
+        self.op.price_history = pd.concat([
+            self.op.price_history,
+            pd.DataFrame(self.op.price_pairs[:, 1]).transpose(),
+        ])
+
+    def _add_continuous_price_to_history(self) -> None:
+        self.op.price_history = pd.concat([
+            self.op.price_history,
+            pd.DataFrame([
+                self.op.pricing_parameters[0],
+                self.op.pricing_parameters[1]
+            ]).transpose(),
+        ])
+
+
diff --git a/simulation/operations/storage_service.py b/simulation/operations/storage_service.py
new file mode 100644
index 0000000..d2ed782
--- /dev/null
+++ b/simulation/operations/storage_service.py
@@ -0,0 +1,159 @@
+from typing import List, Dict, Any, Optional
+from simulation.operations.agents_controller import AgentsController
+from simulation.config_facade import ConfigFacade
+from .decision_request_system import DecisionType, decision_system
+from .decision_decorators import auto_register_agents
+
+
+class StorageService:
+    """
+    Service class for managing storage-related RL agent operations.
+    
+    Encapsulates all storage agent logic that was previously in the Operator class,
+    providing a clean separation of concerns and standardized interface for RL agents.
+    """
+    
+    def __init__(self, operator_instance: Any, agents_controller: Optional[AgentsController] = None, config_facade: Optional[ConfigFacade] = None):
+        """
+        Initialize the StorageService.
+        
+        Args:
+            operator_instance: Reference to the main operator instance
+            agents_controller: Controller for managing RL agents
+            config_facade: Facade for accessing configuration values
+        """
+        self.op = operator_instance
+        self.agents_controller = agents_controller
+        self.config = config_facade or ConfigFacade()
+        
+        # Register agents with the decision request system
+        auto_register_agents(operator_instance)
+    
+    def take_storage_action(self) -> None:
+        """
+        Take storage action using the RL storage agent.
+        """
+        if self.agents_controller and self.agents_controller.storage:
+            # Use the controller to get storage action
+            context = {
+                "charging_hub": self.op.charging_hub,
+                "env": self.op.env
+            }
+            action_result = self.agents_controller.storage_step(context)
+            if action_result:
+                self.op.storage_agent.action = action_result.get("storage_action")
+        else:
+            # Use decision request system for storage decisions
+            action = self._get_storage_decision_via_request()
+            self.op.storage_agent.action = action
+    
+    def conduct_storage_action(self, given_storage_action: Optional[List[float]] = None) -> None:
+        """
+        Execute the storage action by applying it to the electric storage system.
+        
+        Args:
+            given_storage_action: Optional storage action to use instead of agent's action
+        """
+        if given_storage_action:
+            storage_power = given_storage_action[0]
+        else:
+            storage_power = self.op.storage_agent.action[0]
+            
+        if storage_power >= 0:
+            self.op.charging_hub.electric_storage.charge_yn = 1
+            self.op.charging_hub.electric_storage.charging_power = storage_power
+            self.op.charging_hub.electric_storage.discharge_yn = 0
+            self.op.charging_hub.electric_storage.discharging_power = 0
+        elif storage_power < 0:
+            self.op.charging_hub.electric_storage.charge_yn = 0
+            self.op.charging_hub.electric_storage.charging_power = 0
+            self.op.charging_hub.electric_storage.discharge_yn = 1
+            self.op.charging_hub.electric_storage.discharging_power = -storage_power
+            
+        self.op.check_storage(given_storage_action=given_storage_action)
+    
+    def update_storage_agent(self) -> None:
+        """
+        Update the storage agent with new state and experience.
+        """
+        eval_ep = self.op.storage_agent.do_evaluation_iterations
+        action = self.op.storage_agent.descale_action(
+            self.op.storage_agent.action, self.op.charging_hub
+        )
+        self.op.storage_agent.conduct_action(action, self.op.charging_hub, self.op.env, eval_ep=eval_ep)
+        if self.op.storage_agent.time_for_critic_and_actor_to_learn():
+            for _ in range(
+                self.op.storage_agent.hyperparameters[
+                    "learning_updates_per_learning_session"
+                ]
+            ):
+                self.op.storage_agent.learn()
+        mask = (
+            False
+            if self.op.storage_agent.episode_step_number_val
+            >= self.op.storage_agent.environment.MAX_EPISODE_STEPS
+            else self.op.storage_agent.done
+        )
+        # if not eval_ep:
+
+        self.op.storage_agent.save_experience(
+            experience=(
+                self.op.storage_agent.state,
+                action,
+                self.op.storage_agent.reward,
+                self.op.storage_agent.next_state,
+                mask,
+            )
+        )
+        self.op.storage_agent.global_step_number += 1
+        self.op.storage_agent.step_counter += 1
+
+    def _get_storage_decision_via_request(self) -> Any:
+        """
+        Get storage decision through the decision request system.
+        
+        Returns:
+            The storage action/decision
+        """
+        # Get current state from environment
+        storage_state = self.op.charging_hub.storage_agent.environment.get_state(
+            self.op.charging_hub, self.op.env
+        )
+        self.op.storage_agent.state = storage_state
+
+        eval_ep = self.op.storage_agent.do_evaluation_iterations
+        self.op.storage_agent.episode_step_number_val = 0
+        
+        # Create context for the decision request
+        context = {
+            "eval_ep": eval_ep,
+            "charging_hub": self.op.charging_hub,
+            "env": self.op.env
+        }
+        
+        # Create and process decision request
+        request_id = decision_system.create_request(
+            agent_type=DecisionType.STORAGE,
+            state=self.op.storage_agent.state,
+            context=context,
+            metadata={
+                "agent_name": getattr(self.op.storage_agent, "agent_name", "Unknown")
+            }
+        )
+        
+        # Process the request
+        response = decision_system.process_request(request_id)
+        
+        if response:
+            return response.action
+        else:
+            # Fallback to direct agent call if request system fails
+            # Handle different pick_action signatures
+            import inspect
+            sig = inspect.signature(self.op.storage_agent.pick_action)
+            if len(sig.parameters) > 2:  # Method expects eval_ep and charging_hub parameters
+                return self.op.storage_agent.pick_action(eval_ep, self.op.charging_hub)
+            elif len(sig.parameters) > 1:  # Method expects only eval_ep parameter
+                return self.op.storage_agent.pick_action(eval_ep)
+            else:  # Method doesn't expect eval_ep parameter
+                return self.op.storage_agent.pick_action()
diff --git a/simulation/preferences/vehicle.py b/simulation/preferences/vehicle.py
index f2e93d1..52f408f 100644
--- a/simulation/preferences/vehicle.py
+++ b/simulation/preferences/vehicle.py
@@ -46,7 +46,7 @@ def __init__(
         self.energy_requested = self.adjust_energy_request(
             energy_requested_input
         )  # energy_requested  # kWh
-        self.raw_energy_demand = self.energy_requested.copy()
+        self.raw_energy_demand = self.energy_requested  # No need for .copy() on float
         self.energy_requested = min(self.energy_requested, self.park_duration / 60 * 50)
         if pd.isna(self.energy_requested):
             self.energy_requested = 0
@@ -109,14 +109,33 @@ def adjust_energy_request(self, energy_requested_input):
         :return:
         """
 
+        # Handle case where arrival_period is in the future or invalid
+        if self.arrival_period >= self.sim_time:
+            # Vehicle hasn't arrived yet, no energy request
+            return 0.0
+        
+        # Handle case where departure_period is in the past
+        if self.departure_period <= 0:
+            # Vehicle has already departed, no energy request
+            return 0.0
+        
+        # Handle case where park_duration is invalid
+        if self.park_duration <= 0:
+            return 0.0
+
         if self.departure_period <= self.sim_time:
+            # Vehicle will depart before simulation ends, use full energy request
             energy_request = energy_requested_input
         elif self.departure_period > self.sim_time:
-            energy_request = energy_requested_input * (
-                (self.sim_time - self.arrival_period) / self.park_duration
-            )
+            # Vehicle will stay beyond simulation end, prorate the energy request
+            time_in_simulation = max(0, self.sim_time - self.arrival_period)
+            energy_request = energy_requested_input * (time_in_simulation / self.park_duration)
+        else:
+            # Fallback case
+            energy_request = energy_requested_input
 
-        return energy_request
+        # Ensure energy request is non-negative
+        return max(0.0, energy_request)
 
     def set_average_power_requirement_level(self):
         laxity = self.energy_requested / (self.park_duration / 60)
@@ -243,11 +262,4 @@ def update_status(self):
         self.remaining_park_duration = max(self.departure_period - self.env.now, 1)
         # self.remaining_laxity = self.remaining_energy_deficit/max(self.remaining_park_duration,1)
         # if self.mode in ['Connected']:
-        #     print(f'id={self.id}, power={self.charging_power}')
-
-    def reset_profit_reward(self):
-        self.profit_reward = 0
-
-    def calculate_profit_reward(self, energy_price, electricity_tariff):
-        # hour = int((self.env.now % 1440 - self.env.now % 60) / 60)
-        self.profit_reward += self.charging_power / 60 * (energy_price)
+        #     print(f'id={self.id}, power={self.charging_power}')
\ No newline at end of file
diff --git a/utilities/agent_factory.py b/utilities/agent_factory.py
new file mode 100644
index 0000000..7cd4d55
--- /dev/null
+++ b/utilities/agent_factory.py
@@ -0,0 +1,157 @@
+"""
+Agent Factory Module
+
+This module handles the creation and configuration of different types of agents
+for the EVCC simulation framework.
+"""
+
+from typing import Optional
+from resources.configuration.configuration import Configuration
+from utilities.rl_agents.interfaces import DecisionType, AgentType
+from utilities.rl_agents.rule_based_agents import (
+    RuleBasedPricingAgent, RuleBasedChargingAgent, RuleBasedStorageAgent
+)
+from utilities.rl_agents.algorithm_agents import (
+    AlgorithmChargingAgent, AlgorithmRoutingAgent, AlgorithmStorageAgent
+)
+from utilities.rl_agents.agents.actor_critic_agents.SAC import SAC
+from resources.configuration.SAC_configuration import pricing_config
+from utilities.rl_environments.rl_pricing_env import PricingEnv
+
+
+def is_agent_learnable(agent_type: str) -> bool:
+    """
+    Determine if an agent type is learnable (RL agent).
+    
+    Args:
+        agent_type: String representation of agent type
+        
+    Returns:
+        True if the agent is learnable (RL agent), False otherwise
+    """
+    learnable_types = ["RL_SAC", "RL_DQN", "RL_DDPG"]
+    return agent_type.upper() in learnable_types
+
+
+def create_agent(decision_type: str, agent_type: str, algorithm: Optional[str] = None, 
+                 strategy: Optional[str] = None):
+    """
+    Create an agent based on decision type and agent type.
+    
+    Args:
+        decision_type: Type of decision (pricing, charging, storage, routing)
+        agent_type: Type of agent (RL_SAC, RULE_BASED, HEURISTIC, etc.)
+        algorithm: Algorithm name for heuristic agents
+        strategy: Strategy name for agents that support different strategies
+        
+    Returns:
+        Agent instance
+        
+    Raises:
+        ValueError: If decision type or agent type is not supported
+    """
+    if decision_type == "pricing":
+        return _create_pricing_agent(agent_type, strategy)
+    elif decision_type == "charging":
+        return _create_charging_agent(agent_type, algorithm, strategy)
+    elif decision_type == "storage":
+        return _create_storage_agent(agent_type, algorithm, strategy)
+    elif decision_type == "routing":
+        return _create_routing_agent(agent_type, algorithm, strategy)
+    else:
+        raise ValueError(f"Unsupported decision type: {decision_type}")
+
+
+def _create_pricing_agent(agent_type: str, strategy: Optional[str] = None):
+    """Create a pricing agent."""
+    if agent_type == "RL_SAC":
+        return _create_sac_pricing_agent()
+    elif agent_type == "HEURISTIC":
+        strategy = strategy or "time_of_use"
+        return RuleBasedPricingAgent(strategy=strategy)
+    elif agent_type == "RULE_BASED":
+        return RuleBasedPricingAgent(strategy=strategy or "time_of_use")
+    else:
+        raise ValueError(f"Unsupported agent type for pricing: {agent_type}")
+
+
+def _create_charging_agent(agent_type: str, algorithm: Optional[str] = None, 
+                          strategy: Optional[str] = None):
+    """Create a charging agent."""
+    if agent_type == "HEURISTIC":
+        return AlgorithmChargingAgent(algorithm=algorithm or "first_come_first_served")
+    elif agent_type == "RULE_BASED":
+        return RuleBasedChargingAgent(strategy=strategy or "first_come_first_served")
+    else:
+        raise ValueError(f"Unsupported agent type for charging: {agent_type}")
+
+
+def _create_storage_agent(agent_type: str, algorithm: Optional[str] = None, 
+                         strategy: Optional[str] = None):
+    """Create a storage agent."""
+    if agent_type == "HEURISTIC":
+        return AlgorithmStorageAgent(algorithm=algorithm or "peak_shaving")
+    elif agent_type == "RULE_BASED":
+        return RuleBasedStorageAgent(strategy=strategy or "peak_shaving")
+    else:
+        raise ValueError(f"Unsupported agent type for storage: {agent_type}")
+
+
+def _create_routing_agent(agent_type: str, algorithm: Optional[str] = None, 
+                         strategy: Optional[str] = None):
+    """Create a routing agent."""
+    if agent_type == "HEURISTIC":
+        return AlgorithmRoutingAgent(algorithm=algorithm or "lowest_occupancy_first")
+    elif agent_type == "RULE_BASED":
+        return AlgorithmRoutingAgent(algorithm=strategy or "lowest_occupancy_first")
+    else:
+        raise ValueError(f"Unsupported agent type for routing: {agent_type}")
+
+
+def _create_sac_pricing_agent():
+    """Create and configure a SAC pricing agent."""
+    config = Configuration.instance()
+    
+    # Configure pricing environment for RL agent
+    pricing_config.number_chargers = config.facility_size
+    pricing_config.maximum_power = 50
+    pricing_config.maximum_grid_usage = 2000
+    pricing_config.number_power_options = len(config.energy_prices)
+    pricing_config.environment = PricingEnv(config=pricing_config, DQN=False)
+    pricing_config.learnt_network = config.evaluation_after_training
+    pricing_config.evaluation_after_training = config.evaluation_after_training
+    
+    return SAC(pricing_config)
+
+
+def get_agent_configuration(config: Configuration) -> dict:
+    """
+    Get agent configuration from the main configuration.
+    
+    Args:
+        config: Configuration instance
+        
+    Returns:
+        Dictionary containing agent configurations
+    """
+    return {
+        "pricing": {
+            "agent_type": getattr(config, 'default_agent_types', {}).get("pricing"),
+            "strategy": getattr(config, 'default_strategies', {}).get("pricing", "time_of_use")
+        },
+        "charging": {
+            "agent_type": getattr(config, 'default_agent_types', {}).get("charging"),
+            "algorithm": getattr(config, 'default_algorithms', {}).get("charging", "first_come_first_served"),
+            "strategy": getattr(config, 'default_strategies', {}).get("charging", "first_come_first_served")
+        },
+        "storage": {
+            "agent_type": getattr(config, 'default_agent_types', {}).get("storage"),
+            "algorithm": getattr(config, 'default_algorithms', {}).get("storage", "peak_shaving"),
+            "strategy": getattr(config, 'default_strategies', {}).get("storage", "peak_shaving")
+        },
+        "routing": {
+            "agent_type": getattr(config, 'default_agent_types', {}).get("routing"),
+            "algorithm": getattr(config, 'default_algorithms', {}).get("routing", "lowest_occupancy_first"),
+            "strategy": getattr(config, 'default_strategies', {}).get("routing", "lowest_occupancy_first")
+        }
+    }
diff --git a/utilities/hyperparameter_tuner.py b/utilities/hyperparameter_tuner.py
new file mode 100644
index 0000000..b429ddd
--- /dev/null
+++ b/utilities/hyperparameter_tuner.py
@@ -0,0 +1,148 @@
+"""
+Hyperparameter Tuner Module
+
+This module handles hyperparameter tuning for learnable agents in the EVCC simulation framework.
+"""
+
+from typing import Dict, Any
+from resources.configuration.configuration import Configuration
+from utilities.training_manager import run_standard_training
+import pandas as pd
+import numpy as np
+
+
+def find_best_parameters(agent, config: Configuration) -> None:
+    """
+    Find best hyperparameters for the agent through grid search.
+    
+    Args:
+        agent: The agent instance to tune
+        config: Configuration instance
+    """
+    print("Starting hyperparameter tuning...")
+    
+    # Try to read existing training results
+    try:
+        training_results = pd.read_csv(f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}.csv')
+    except:
+        training_results = pd.DataFrame(columns=['learning_rate', 'batch_size', 'tau', 'result'])
+    
+    best_results = -10000000000
+    best_parameters = {'learning_rate': 0, 'batch_size': 0, 'tau': 0}
+    
+    # Hyperparameter grid to search
+    learning_rates = [5e-5, 1e-4, 5e-4, 1e-3]
+    batch_sizes = [64, 256, 512]
+    tau_values = [0.05, 0.1]
+    
+    total_combinations = len(learning_rates) * len(batch_sizes) * len(tau_values)
+    current_combination = 0
+    
+    for lr in learning_rates:
+        for bs in batch_sizes:
+            for tau in tau_values:
+                current_combination += 1
+                print(f"Testing combination {current_combination}/{total_combinations}: lr={lr}, bs={bs}, tau={tau}")
+                
+                # Update agent hyperparameters
+                if hasattr(agent, 'hyperparameters'):
+                    agent.hyperparameters['batch_size'] = bs
+                    if 'Actor' in agent.hyperparameters:
+                        agent.hyperparameters['Actor']['learning_rate'] = lr
+                    if 'Critic' in agent.hyperparameters:
+                        agent.hyperparameters['Critic']['learning_rate'] = lr
+                        agent.hyperparameters['Critic']['tau'] = tau
+                    if 'Actor' in agent.hyperparameters:
+                        agent.hyperparameters['Actor']['tau'] = tau
+                    agent.hyperparameters['min_steps_before_learning'] = max(bs, 256)
+                
+                # Run training experiment
+                try:
+                    mean_reward = run_standard_training(agent, "pricing", config, return_rewards=True)
+                    
+                    # Track results
+                    hyperparameters = {'learning_rate': lr, 'batch_size': bs, 'tau': tau}
+                    if np.array(mean_reward).mean() > best_results:
+                        best_results = np.array(mean_reward).mean()
+                        best_parameters = hyperparameters
+                        print(f"New best result: {best_results} with parameters: {best_parameters}")
+                    
+                    # Save results
+                    results_dict = {'result': mean_reward}
+                    new_row = pd.DataFrame([[lr, bs, tau, mean_reward]], columns=training_results.columns)
+                    training_results = pd.concat([new_row, training_results], ignore_index=True)
+                    
+                    # Save to CSV
+                    training_results.to_csv(
+                        f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}_tuning.csv', 
+                        index=False
+                    )
+                    
+                    print(f'Parameters: {hyperparameters}, Results: {results_dict}')
+                    print(f'Best so far: {best_results}, Best parameters: {best_parameters}')
+                    
+                except Exception as e:
+                    print(f"Error during hyperparameter tuning for {hyperparameters}: {e}")
+                    continue
+    
+    print(f"\nHyperparameter tuning completed!")
+    print(f"Best result: {best_results}")
+    print(f"Best parameters: {best_parameters}")
+    
+    # Save final best parameters
+    best_params_df = pd.DataFrame([best_parameters])
+    best_params_df.to_csv(
+        f'{config.OUTPUT_DATA_PATH}best_parameters_{agent.config.name}.csv', 
+        index=False
+    )
+
+
+def get_hyperparameter_grid() -> Dict[str, list]:
+    """
+    Get the default hyperparameter grid for tuning.
+    
+    Returns:
+        Dictionary containing hyperparameter grids
+    """
+    return {
+        'learning_rates': [5e-5, 1e-4, 5e-4, 1e-3],
+        'batch_sizes': [64, 256, 512],
+        'tau_values': [0.05, 0.1]
+    }
+
+
+def update_agent_hyperparameters(agent, hyperparameters: Dict[str, Any]) -> None:
+    """
+    Update agent hyperparameters safely.
+    
+    Args:
+        agent: The agent instance to update
+        hyperparameters: Dictionary of hyperparameters to update
+    """
+    if not hasattr(agent, 'hyperparameters'):
+        print("Warning: Agent does not have hyperparameters attribute")
+        return
+    
+    # Update batch size
+    if 'batch_size' in hyperparameters:
+        agent.hyperparameters['batch_size'] = hyperparameters['batch_size']
+    
+    # Update learning rates
+    if 'learning_rate' in hyperparameters:
+        lr = hyperparameters['learning_rate']
+        if 'Actor' in agent.hyperparameters:
+            agent.hyperparameters['Actor']['learning_rate'] = lr
+        if 'Critic' in agent.hyperparameters:
+            agent.hyperparameters['Critic']['learning_rate'] = lr
+    
+    # Update tau values
+    if 'tau' in hyperparameters:
+        tau = hyperparameters['tau']
+        if 'Actor' in agent.hyperparameters:
+            agent.hyperparameters['Actor']['tau'] = tau
+        if 'Critic' in agent.hyperparameters:
+            agent.hyperparameters['Critic']['tau'] = tau
+    
+    # Update min steps before learning
+    if 'batch_size' in hyperparameters:
+        agent.hyperparameters['min_steps_before_learning'] = max(hyperparameters['batch_size'], 256)
diff --git a/utilities/rl_agents/adapters/charging_adapter.py b/utilities/rl_agents/adapters/charging_adapter.py
new file mode 100644
index 0000000..a1c1860
--- /dev/null
+++ b/utilities/rl_agents/adapters/charging_adapter.py
@@ -0,0 +1,80 @@
+from typing import Any, Dict, List, Optional
+from utilities.rl_agents.interfaces import ChargingAgent
+
+
+class ChargingEnvAgentAdapter(ChargingAgent):
+    """
+    Adapter to wrap existing RL charging agents and environments to conform to the ChargingAgent interface.
+    
+    This adapter provides a standardized interface for charging agents, allowing easy swapping
+    of different RL algorithms while maintaining compatibility with the charging service.
+    """
+    
+    def __init__(self, rl_agent: Any, charging_env: Any):
+        """
+        Initialize the charging agent adapter.
+        
+        Args:
+            rl_agent: The underlying RL agent (e.g., SAC, DQN)
+            charging_env: The charging environment (e.g., ChargingHubInvestmentEnv)
+        """
+        self.rl_agent = rl_agent
+        self.charging_env = charging_env
+        self.current_state = None
+        self.current_action = None
+        self.current_reward = None
+        self.next_state = None
+        self.done = False
+    
+    def reset(self) -> None:
+        """Reset the agent and environment."""
+        self.current_state = self.charging_env.reset()
+        self.rl_agent.reset_game()
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        """
+        Update the agent's state based on the current context.
+        
+        Args:
+            context: Dictionary containing charging_hub and env
+        """
+        charging_hub = context.get("charging_hub")
+        env = context.get("env")
+        self.current_state = self.charging_env.get_state(charging_hub, env)
+        self.rl_agent.state = self.current_state
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select a charging action based on current state and vehicles.
+        
+        Args:
+            vehicles: List of vehicles to consider for charging
+            context: Dictionary containing charging_hub and env
+            
+        Returns:
+            Dictionary containing the selected charging action
+        """
+        charging_hub = context.get("charging_hub")
+        eval_ep = self.rl_agent.do_evaluation_iterations
+        self.rl_agent.episode_step_number_val = 0
+        
+        # Get action from the RL agent
+        action_raw = self.rl_agent.pick_action(eval_ep)
+        self.current_action = action_raw
+        
+        # Rescale action if needed
+        rescaled_action = self.rl_agent.rescale_action(action_raw)
+        
+        return {"charging_action": rescaled_action}
+    
+    def learn(self, transition: Dict[str, Any]) -> None:
+        """
+        Learn from the transition experience.
+        
+        Args:
+            transition: Dictionary containing state, action, reward, next_state, done
+        """
+        # The RL agent's internal learn method is typically called
+        # by the agent itself after its conduct_action.
+        # If explicit learning is needed, it would be handled here.
+        pass
diff --git a/utilities/rl_agents/adapters/gym_agent_adapter.py b/utilities/rl_agents/adapters/gym_agent_adapter.py
new file mode 100644
index 0000000..8a70c9c
--- /dev/null
+++ b/utilities/rl_agents/adapters/gym_agent_adapter.py
@@ -0,0 +1,180 @@
+from typing import Any, Dict, List, Optional, Union
+import numpy as np
+from utilities.rl_agents.interfaces import PricingAgent, ChargingAgent, StorageAgent
+from utilities.rl_environments.evch_gym_env import EVCHGymEnv, AgentType
+
+
+class GymAgentAdapter:
+    """
+    Adapter for standard gym-compatible RL agents to work with EVCH simulation.
+    
+    This adapter allows any gym-compatible RL agent (Stable Baselines3, RLlib, etc.)
+    to be used with the EVCH simulation by providing a standardized interface.
+    """
+    
+    def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any):
+        """
+        Initialize the gym agent adapter.
+        
+        Args:
+            gym_env: The EVCH gym environment
+            gym_agent: The gym-compatible RL agent (must have predict() method)
+        """
+        self.gym_env = gym_env
+        self.gym_agent = gym_agent
+        self.current_state = None
+        self.current_action = None
+        self.current_reward = None
+        self.next_state = None
+        self.done = False
+        
+        # Validate that the agent has the required methods
+        if not hasattr(self.gym_agent, 'predict'):
+            raise ValueError("Gym agent must have a 'predict' method")
+    
+    def reset(self) -> None:
+        """Reset the agent and environment."""
+        self.current_state, _ = self.gym_env.reset()
+        if hasattr(self.gym_agent, 'reset'):
+            self.gym_agent.reset()
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        """
+        Update the agent's state based on the current context.
+        
+        Args:
+            context: Dictionary containing charging_hub and env
+        """
+        # Set the simulation context in the gym environment
+        charging_hub = context.get("charging_hub")
+        sim_env = context.get("env")
+        if charging_hub and sim_env:
+            self.gym_env.set_simulation_context(charging_hub, sim_env)
+        
+        # Get current state from the gym environment
+        self.current_state = self.gym_env._get_state()
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select an action using the gym agent.
+        
+        Args:
+            context: Dictionary containing charging_hub and env
+            
+        Returns:
+            Dictionary containing the selected action
+        """
+        # Update state first
+        self.update_state(context)
+        
+        # Use the gym agent to predict action
+        if hasattr(self.gym_agent, 'predict'):
+            # Standard gym agent interface
+            action, _ = self.gym_agent.predict(self.current_state, deterministic=True)
+        elif hasattr(self.gym_agent, 'act'):
+            # Alternative interface
+            action = self.gym_agent.act(self.current_state)
+        else:
+            # Fallback: assume agent is callable
+            action = self.gym_agent(self.current_state)
+        
+        self.current_action = action
+        
+        # Return action in the format expected by the service
+        if self.gym_env.agent_type == AgentType.PRICING:
+            return {"pricing_parameters": action}
+        elif self.gym_env.agent_type == AgentType.CHARGING:
+            return {"charging_action": action}
+        elif self.gym_env.agent_type == AgentType.STORAGE:
+            return {"storage_action": action}
+        else:
+            return {"action": action}
+    
+    def learn(self, transition: Dict[str, Any]) -> None:
+        """
+        Learn from the transition experience.
+        
+        Args:
+            transition: Dictionary containing state, action, reward, next_state, done
+        """
+        # Most gym agents handle learning internally during training
+        # This method is called for compatibility but may not be used
+        pass
+    
+    def train(self, total_timesteps: int = 1000) -> None:
+        """
+        Train the gym agent.
+        
+        Args:
+            total_timesteps: Number of timesteps to train for
+        """
+        if hasattr(self.gym_agent, 'learn'):
+            self.gym_agent.learn(total_timesteps=total_timesteps)
+        else:
+            raise NotImplementedError("Gym agent does not have a 'learn' method")
+
+
+class GymPricingAgentAdapter(GymAgentAdapter, PricingAgent):
+    """Adapter for gym agents used as pricing agents."""
+    
+    def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any):
+        super().__init__(gym_env, gym_agent)
+        if gym_env.agent_type != AgentType.PRICING:
+            raise ValueError("Gym environment must be configured for pricing agent")
+
+
+class GymChargingAgentAdapter(GymAgentAdapter, ChargingAgent):
+    """Adapter for gym agents used as charging agents."""
+    
+    def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any):
+        super().__init__(gym_env, gym_agent)
+        if gym_env.agent_type != AgentType.CHARGING:
+            raise ValueError("Gym environment must be configured for charging agent")
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select charging action based on vehicles and context.
+        
+        Args:
+            vehicles: List of vehicles to consider for charging
+            context: Dictionary containing charging_hub and env
+            
+        Returns:
+            Dictionary containing the selected charging action
+        """
+        # Add vehicles to context for the gym environment
+        context_with_vehicles = context.copy()
+        context_with_vehicles["vehicles"] = vehicles
+        
+        return super().select_action(context_with_vehicles)
+
+
+class GymStorageAgentAdapter(GymAgentAdapter, StorageAgent):
+    """Adapter for gym agents used as storage agents."""
+    
+    def __init__(self, gym_env: EVCHGymEnv, gym_agent: Any):
+        super().__init__(gym_env, gym_agent)
+        if gym_env.agent_type != AgentType.STORAGE:
+            raise ValueError("Gym environment must be configured for storage agent")
+
+
+# Factory functions for easy adapter creation
+def create_gym_pricing_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymPricingAgentAdapter:
+    """Create a gym pricing agent adapter."""
+    from utilities.rl_environments.evch_gym_env import make_pricing_env
+    gym_env = make_pricing_env(config_dict, **kwargs)
+    return GymPricingAgentAdapter(gym_env, gym_agent)
+
+
+def create_gym_charging_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymChargingAgentAdapter:
+    """Create a gym charging agent adapter."""
+    from utilities.rl_environments.evch_gym_env import make_charging_env
+    gym_env = make_charging_env(config_dict, **kwargs)
+    return GymChargingAgentAdapter(gym_env, gym_agent)
+
+
+def create_gym_storage_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymStorageAgentAdapter:
+    """Create a gym storage agent adapter."""
+    from utilities.rl_environments.evch_gym_env import make_storage_env
+    gym_env = make_storage_env(config_dict, **kwargs)
+    return GymStorageAgentAdapter(gym_env, gym_agent)
diff --git a/utilities/rl_agents/adapters/pricing_adapter.py b/utilities/rl_agents/adapters/pricing_adapter.py
new file mode 100644
index 0000000..381d4ec
--- /dev/null
+++ b/utilities/rl_agents/adapters/pricing_adapter.py
@@ -0,0 +1,42 @@
+from typing import Any, Dict
+
+from utilities.rl_agents.interfaces import PricingAgent as PricingAgentInterface
+from utilities.rl_environments.rl_pricing_env import PricingEnv
+
+
+class PricingEnvAgentAdapter(PricingAgentInterface):
+    """
+    Adapter that wires an underlying RL policy to the PricingEnv API.
+    The underlying policy must implement: reset(), act(obs) -> action, learn(transition)
+    """
+
+    def __init__(self, env: PricingEnv, policy: Any):
+        self.env = env
+        self.policy = policy
+        self._last_obs = None
+
+    def reset(self) -> None:
+        if hasattr(self.policy, "reset"):
+            self.policy.reset()
+        self._last_obs = self.env.reset()
+
+    def update_state(self, context: Dict[str, Any]) -> None:
+        charging_hub = context.get("charging_hub")
+        sim_env = context.get("env")
+        self._last_obs = self.env.get_state(charging_hub, sim_env)
+
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        if self._last_obs is None:
+            self.update_state(context)
+        if hasattr(self.policy, "act"):
+            action = self.policy.act(self._last_obs)
+        else:
+            # Fallback: assume policy is callable
+            action = self.policy(self._last_obs)
+        return {"action": action}
+
+    def learn(self, transition: Dict[str, Any]) -> None:
+        if hasattr(self.policy, "learn"):
+            self.policy.learn(transition)
+
+
diff --git a/utilities/rl_agents/adapters/storage_adapter.py b/utilities/rl_agents/adapters/storage_adapter.py
new file mode 100644
index 0000000..0dd5266
--- /dev/null
+++ b/utilities/rl_agents/adapters/storage_adapter.py
@@ -0,0 +1,76 @@
+from typing import Any, Dict, List, Optional
+from utilities.rl_agents.interfaces import StorageAgent
+
+
+class StorageEnvAgentAdapter(StorageAgent):
+    """
+    Adapter to wrap existing RL storage agents and environments to conform to the StorageAgent interface.
+    
+    This adapter provides a standardized interface for storage agents, allowing easy swapping
+    of different RL algorithms while maintaining compatibility with the storage service.
+    """
+    
+    def __init__(self, rl_agent: Any, storage_env: Any):
+        """
+        Initialize the storage agent adapter.
+        
+        Args:
+            rl_agent: The underlying RL agent (e.g., SAC, DQN)
+            storage_env: The storage environment (e.g., StorageEnv)
+        """
+        self.rl_agent = rl_agent
+        self.storage_env = storage_env
+        self.current_state = None
+        self.current_action = None
+        self.current_reward = None
+        self.next_state = None
+        self.done = False
+    
+    def reset(self) -> None:
+        """Reset the agent and environment."""
+        self.current_state = self.storage_env.reset()
+        self.rl_agent.reset_game()
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        """
+        Update the agent's state based on the current context.
+        
+        Args:
+            context: Dictionary containing charging_hub and env
+        """
+        charging_hub = context.get("charging_hub")
+        env = context.get("env")
+        self.current_state = self.storage_env.get_state(charging_hub, env)
+        self.rl_agent.state = self.current_state
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select a storage action based on current state.
+        
+        Args:
+            context: Dictionary containing charging_hub and env
+            
+        Returns:
+            Dictionary containing the selected storage action
+        """
+        charging_hub = context.get("charging_hub")
+        eval_ep = self.rl_agent.do_evaluation_iterations
+        self.rl_agent.episode_step_number_val = 0
+        
+        # Get action from the RL agent
+        action_raw = self.rl_agent.pick_action(eval_ep)
+        self.current_action = action_raw
+        
+        return {"storage_action": action_raw}
+    
+    def learn(self, transition: Dict[str, Any]) -> None:
+        """
+        Learn from the transition experience.
+        
+        Args:
+            transition: Dictionary containing state, action, reward, next_state, done
+        """
+        # The RL agent's internal learn method is typically called
+        # by the agent itself after its conduct_action.
+        # If explicit learning is needed, it would be handled here.
+        pass
diff --git a/utilities/rl_agents/agents/Base_Agent.py b/utilities/rl_agents/agents/Base_Agent.py
index de5a58b..faaff8b 100644
--- a/utilities/rl_agents/agents/Base_Agent.py
+++ b/utilities/rl_agents/agents/Base_Agent.py
@@ -256,11 +256,9 @@ def run_n_episodes(
         # if self.config.save_model: self.locally_save_policy()
         return self.game_full_episode_scores, self.rolling_results, time_taken
 
-    def conduct_action(self, action, charging_hub, env):
+    def conduct_action(self, action):
         """Conducts an action in the environment"""
-        self.next_state, self.reward, self.done, _ = self.environment.step(
-            action, charging_hub, env
-        )
+        self.next_state, self.reward, self.done, _ = self.environment.step(action)
         self.total_episode_score_so_far += self.reward
         if self.hyperparameters["clip_rewards"]:
             self.reward = max(min(self.reward, 1.0), -1.0)
diff --git a/utilities/rl_agents/agents/actor_critic_agents/SAC.py b/utilities/rl_agents/agents/actor_critic_agents/SAC.py
index 226247f..6e69333 100644
--- a/utilities/rl_agents/agents/actor_critic_agents/SAC.py
+++ b/utilities/rl_agents/agents/actor_critic_agents/SAC.py
@@ -229,8 +229,7 @@ def reset_game(self):
         if self.add_extra_noise:
             self.noise.reset()
 
-    def step(self, charging_hub, env):
-        pass
+    def step(self):
         """Runs an episode on the game, saving the experience and running a learning step if appropriate"""
         eval_ep = (
             self.episode_number % TRAINING_EPISODES_PER_EVAL_EPISODE == 0
@@ -238,8 +237,8 @@ def step(self, charging_hub, env):
         )
         self.episode_step_number_val = 0
         # while not self.done:
-        self.action = self.pick_action(eval_ep, charging_hub)
-        self.conduct_action(self.action, charging_hub, env)
+        self.action = self.pick_action(eval_ep)
+        self.conduct_action(self.action)
         if self.time_for_critic_and_actor_to_learn():
             for _ in range(
                 self.hyperparameters["learning_updates_per_learning_session"]
@@ -257,110 +256,22 @@ def step(self, charging_hub, env):
         self.state = self.next_state
         self.global_step_number += 1
 
-        # print(self.pick_action(eval_ep, charging_hub, state=self.environment.get_state(None, None)))
-
     def rescale_action(self, action):
         return (
             action * (self.action_range[1] - self.action_range[0]) / 2.0
             + (self.action_range[1] + self.action_range[0]) / 2.0
         )
 
-    def descale_action(self, action, charging_hub):
+    def descale_action(self, action):
 
         actions = (action - ((self.action_range[1] + self.action_range[0]) / 2.0)) / (
             (self.action_range[1] - self.action_range[0]) / 2
         )
         return actions
 
-    def penalty_action(self, action, charging_hub):
-        vehicle_state = self.state[24 + 5 + 5 :]
-        ### check charging action
-        total_usage = np.array([])
-        i = 0
-        for charger in charging_hub.chargers:
-            associated_power = np.array([])
-            for j in range(charger.number_of_connectors):
-                maximum_power = charger.power
-                if vehicle_state[i * 3] <= 0:
-                    charging_hub.reward["feasibility"] += action[i + 1]
-                else:
-                    associated_power = np.append(associated_power, action[i + 1])
-                    total_usage = np.append(total_usage, action[i + 1])
-                i += 1
-            surplus_per_charger = max(associated_power.sum() - maximum_power, 0)
-            charging_hub.reward["feasibility"] += surplus_per_charger
-        total_surplus = max(
-            total_usage.sum() - charging_hub.operator.free_grid_capa_actual[0], 0
-        )
-        charging_hub.reward["feasibility"] += total_surplus
-
-    def checked_action(self, action, charging_hub):
-        vehicle_state = self.state[24 + 5 + 5 :]
-        ### check charging action
-        i = 0
-        for charger in charging_hub.chargers:
-            lower_bound = i + 1
-            for j in range(charger.number_of_connectors):
-                maximum_power = charger.power
-                if vehicle_state[i * 3] <= 0:
-                    action[i + 1] = 0
-                i += 1
-            upper_bound = i + 1
-
-            while action[lower_bound:upper_bound].sum() > maximum_power:
-                number_active_chargers = len(
-                    [f for f in action[lower_bound:upper_bound] if f > 0]
-                )
-                surplus_per_charger = (
-                    max(action[lower_bound:upper_bound].sum() - maximum_power, 0)
-                    / number_active_chargers
-                )
-                action[lower_bound:upper_bound] -= surplus_per_charger
-                for c in range(len(action[lower_bound:upper_bound])):
-                    action[lower_bound:upper_bound][c] = max(
-                        action[lower_bound:upper_bound][c], 0
-                    )
-
-        storage_object = charging_hub.electric_storage
-        storage_object.SoC = min(
-            storage_object.SoC, storage_object.max_energy_stored_kWh
-        )
-        storage_object.SoC = max(storage_object.SoC, 0)
-        if action[0] >= 0:
-            if (
-                storage_object.SoC + action[0] / 60 * charging_hub.planning_interval
-                > storage_object.max_energy_stored_kWh
-            ):
-                action[0] = (
-                    storage_object.max_energy_stored_kWh - storage_object.SoC
-                ) / (60 * charging_hub.planning_interval)
-            action[0] = min(action[0], charging_hub.operator.free_grid_capa_actual[0])
-
-        # discharge rate cannot exceed SoC, and hub demand (i.e., no infeed)
-        if action[0] < 0:
-            if storage_object.SoC <= 0:
-                action[0] = 0
-            elif (
-                storage_object.SoC + (action[0] / 60 * charging_hub.planning_interval)
-                < 0
-            ):
-                action[0] = -max(
-                    (storage_object.SoC) / (60 * charging_hub.planning_interval), 0
-                )
 
-        while action.sum() - charging_hub.operator.free_grid_capa_actual[0] > 0:
-            number_active_chargers = len([a for a in action if a > 0])
-            surplus_per_charger = (
-                max(action.sum() - charging_hub.operator.free_grid_capa_actual[0], 0)
-                / number_active_chargers
-            )
-            for i in range(1, len(action)):
-                action[i] = max(action[i] - surplus_per_charger, 0)
-            # if action[0]>0:
-            #     action[0] = max(action[0] - surplus_per_charger, 0)
-        return action
 
-    def pick_action(self, eval_ep, charging_hub=None, state=None):
+    def pick_action(self, eval_ep, state=None):
         """Picks an action using one of three methods: 1) Randomly if we haven't passed a certain number of steps,
         2) Using the actor in evaluation mode if eval_ep is True  3) Using the actor in training mode if eval_ep is False.
         The difference between evaluation and training mode is that training mode does more exploration
@@ -376,13 +287,13 @@ def pick_action(self, eval_ep, charging_hub=None, state=None):
             #         action[i] = max(action[i], -1)
             #         action[i] = min(action[i], 1)
             # action = self.rescale_action(action)
-            # action = self.checked_action(action, charging_hub)
+            # action = self.checked_action(action)
         elif (
             self.global_step_number < self.hyperparameters["min_steps_before_learning"]
         ):
             action = self.environment.action_space.sample().astype("float64")
-            action = self.descale_action(action, charging_hub)
-            # action = self.checked_action(action, charging_hub)
+            action = self.descale_action(action)
+            # action = self.checked_action(action)
         else:
             action = self.actor_pick_action(state=state)
             if self.add_extra_noise:
@@ -392,7 +303,7 @@ def pick_action(self, eval_ep, charging_hub=None, state=None):
                     action[i] = max(action[i], -1)
                     action[i] = min(action[i], 1)
             # action = self.rescale_action(action)
-            # action = self.checked_action(action, charging_hub)
+            # action = self.checked_action(action)
         return action
 
     def actor_pick_action(self, state=None, eval=False):
diff --git a/utilities/rl_agents/algorithm_agents.py b/utilities/rl_agents/algorithm_agents.py
new file mode 100644
index 0000000..17155d7
--- /dev/null
+++ b/utilities/rl_agents/algorithm_agents.py
@@ -0,0 +1,682 @@
+from typing import Any, Dict, List, Optional
+import numpy as np
+from datetime import datetime
+
+from utilities.rl_agents.interfaces import (
+    BaseAgent, 
+    DecisionType, 
+    AgentType,
+    PricingAgent,
+    ChargingAgent,
+    StorageAgent,
+    RoutingAgent,
+    VehicleAssignmentAgent
+)
+
+# Import existing algorithms
+from simulation.operations.ChargingAlgorithms import (
+    uncontrolled as charging_uncontrolled,
+    first_come_first_served,
+    earliest_deadline_first,
+    least_laxity_first,
+    equal_sharing,
+    online_myopic,
+    online_multi_period,
+    integrated_charging_storage
+)
+
+from simulation.operations.RoutingAlgorithms import (
+    random_charger_assignment,
+    lowest_occupancy_first_charger_assignment,
+    fill_one_after_other_charger_assignment,
+    lowest_utilization_first_charger_assignment,
+    matching_supply_demand_level,
+    assign_to_the_minimum_power
+)
+
+from simulation.operations.StorageAlgorithms import (
+    uncontrolled as storage_uncontrolled,
+    temporal_arbitrage,
+    peak_shaving
+)
+
+from simulation.operations.IntegratedAlgorithms import (
+    perfect_info_charging_routing,
+    perfect_info_charging_routing_storage
+)
+
+
+class AlgorithmChargingAgent(ChargingAgent):
+    """
+    Agent that wraps existing charging algorithms.
+    
+    This agent provides a standardized interface to all the existing
+    charging algorithms in the codebase.
+    """
+    
+    def __init__(self, algorithm: str = "first_come_first_served"):
+        self.algorithm = algorithm
+        self.state = None
+        self._agent_type = AgentType.HEURISTIC
+        self._decision_type = DecisionType.CHARGING
+        
+        # Algorithm mapping
+        self.algorithm_functions = {
+            "uncontrolled": self._uncontrolled_charging,
+            "first_come_first_served": self._first_come_first_served,
+            "earliest_deadline_first": self._earliest_deadline_first,
+            "least_laxity_first": self._least_laxity_first,
+            "equal_sharing": self._equal_sharing,
+            "online_myopic": self._online_myopic,
+            "online_multi_period": self._online_multi_period,
+            "integrated_storage": self._integrated_storage,
+            "perfect_info": self._perfect_info,
+            "perfect_info_with_storage": self._perfect_info_with_storage
+        }
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        self.state = context
+    
+    def get_state(self) -> Any:
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        self.state = state
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select charging action using the specified algorithm.
+        
+        Args:
+            vehicles: List of vehicles requiring charging
+            context: Dictionary containing charging context
+            
+        Returns:
+            Dictionary containing charging decision
+        """
+        if self.algorithm not in self.algorithm_functions:
+            raise ValueError(f"Unknown charging algorithm: {self.algorithm}")
+        
+        # Get algorithm function
+        algo_func = self.algorithm_functions[self.algorithm]
+        
+        # Execute algorithm
+        charging_actions = algo_func(vehicles, context)
+        
+        return {
+            "charging_actions": charging_actions,
+            "power_allocation": self.algorithm,
+            "priority_order": list(range(len(vehicles))),
+            "confidence": 0.9,
+            "strategy": self.algorithm,
+            "reasoning": f"Applied {self.algorithm} charging algorithm to {len(vehicles)} vehicles"
+        }
+    
+    def _uncontrolled_charging(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Uncontrolled charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        charging_capacity = context.get("charging_capacity", 500)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        planning_period_length = context.get("planning_period_length", 15)
+        
+        # Execute algorithm
+        charging_uncontrolled(
+            env=env,
+            connected_vehicles=vehicles,
+            charging_stations=charging_stations,
+            charging_capacity=charging_capacity,
+            free_grid_capacity=free_grid_capacity,
+            planning_period_length=planning_period_length
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _first_come_first_served(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """First come first served charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        charging_capacity = context.get("charging_capacity", 500)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        planning_period_length = context.get("planning_period_length", 15)
+        
+        # Execute algorithm
+        first_come_first_served(
+            env=env,
+            connected_vehicles=vehicles,
+            charging_stations=charging_stations,
+            charging_capacity=charging_capacity,
+            free_grid_capacity=free_grid_capacity,
+            planning_period_length=planning_period_length
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _earliest_deadline_first(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Earliest deadline first charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        charging_capacity = context.get("charging_capacity", 500)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        planning_period_length = context.get("planning_period_length", 15)
+        
+        # Execute algorithm
+        earliest_deadline_first(
+            env=env,
+            connected_vehicles=vehicles,
+            charging_stations=charging_stations,
+            charging_capacity=charging_capacity,
+            free_grid_capacity=free_grid_capacity,
+            planning_period_length=planning_period_length
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _least_laxity_first(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Least laxity first charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        charging_capacity = context.get("charging_capacity", 500)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        planning_period_length = context.get("planning_period_length", 15)
+        
+        # Execute algorithm
+        least_laxity_first(
+            env=env,
+            connected_vehicles=vehicles,
+            charging_stations=charging_stations,
+            charging_capacity=charging_capacity,
+            free_grid_capacity=free_grid_capacity,
+            planning_period_length=planning_period_length
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _equal_sharing(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Equal sharing charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        charging_capacity = context.get("charging_capacity", 500)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        planning_period_length = context.get("planning_period_length", 15)
+        
+        # Execute algorithm
+        equal_sharing(
+            env=env,
+            connected_vehicles=vehicles,
+            charging_stations=charging_stations,
+            charging_capacity=charging_capacity,
+            free_grid_capacity=free_grid_capacity,
+            planning_period_length=planning_period_length
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _online_myopic(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Online myopic charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        electricity_cost = context.get("electricity_cost", [0.15] * 24)
+        sim_time = context.get("sim_time", 1440)
+        peak_load_history = context.get("peak_load_history", [])
+        free_grid_capa_actual = context.get("free_grid_capa_actual", 500)
+        free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500)
+        
+        # Execute algorithm
+        online_myopic(
+            vehicles=vehicles,
+            charging_stations=charging_stations,
+            env=env,
+            electricity_cost=electricity_cost,
+            sim_time=sim_time,
+            peak_load_history=peak_load_history,
+            free_grid_capa_actual=free_grid_capa_actual,
+            free_grid_capa_predicted=free_grid_capa_predicted
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _online_multi_period(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Online multi-period charging algorithm."""
+        env = context.get("env")
+        charging_stations = context.get("charging_stations", [])
+        electricity_cost = context.get("electricity_cost", [0.15] * 24)
+        sim_time = context.get("sim_time", 1440)
+        peak_load_history = context.get("peak_load_history", [])
+        free_grid_capa_actual = context.get("free_grid_capa_actual", 500)
+        free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500)
+        
+        # Execute algorithm
+        online_multi_period(
+            vehicles=vehicles,
+            charging_stations=charging_stations,
+            env=env,
+            electricity_cost=electricity_cost,
+            sim_time=sim_time,
+            peak_load_history=peak_load_history,
+            free_grid_capa_actual=free_grid_capa_actual,
+            free_grid_capa_predicted=free_grid_capa_predicted
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _integrated_storage(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Integrated charging and storage algorithm."""
+        storage = context.get("storage")
+        charging_stations = context.get("charging_stations", [])
+        env = context.get("env")
+        electricity_cost = context.get("electricity_cost", [0.15] * 24)
+        sim_time = context.get("sim_time", 1440)
+        peak_load_history = context.get("peak_load_history", [])
+        free_grid_capa_actual = context.get("free_grid_capa_actual", 500)
+        free_grid_capa_predicted = context.get("free_grid_capa_predicted", 500)
+        
+        # Execute algorithm
+        integrated_charging_storage(
+            storage=storage,
+            vehicles=vehicles,
+            charging_stations=charging_stations,
+            env=env,
+            electricity_cost=electricity_cost,
+            sim_time=sim_time,
+            peak_load_history=peak_load_history,
+            free_grid_capa_actual=free_grid_capa_actual,
+            free_grid_capa_predicted=free_grid_capa_predicted
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _perfect_info(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Perfect information charging and routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        env = context.get("env")
+        grid_capacity = context.get("grid_capacity", 500)
+        electricity_cost = context.get("electricity_cost", [0.15] * 24)
+        sim_time = context.get("sim_time", 1440)
+        baseload = context.get("baseload", 100)
+        generation = context.get("generation")
+        service_level = context.get("service_level", 1)
+        time_range = context.get("time_range", 24)
+        
+        # Execute algorithm
+        perfect_info_charging_routing(
+            vehicles=vehicles,
+            charging_stations=charging_stations,
+            env=env,
+            grid_capacity=grid_capacity,
+            electricity_cost=electricity_cost,
+            sim_time=sim_time,
+            baseload=baseload,
+            generation=generation,
+            service_level=service_level,
+            time_range=time_range
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def _perfect_info_with_storage(self, vehicles: List[Any], context: Dict[str, Any]) -> List[float]:
+        """Perfect information charging, routing, and storage algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        env = context.get("env")
+        grid_capacity = context.get("grid_capacity", 500)
+        electricity_cost = context.get("electricity_cost", [0.15] * 24)
+        sim_time = context.get("sim_time", 1440)
+        baseload = context.get("baseload", 100)
+        storage = context.get("storage")
+        service_level = context.get("service_level", 1)
+        time_range = context.get("time_range", 24 * 5)
+        
+        # Execute algorithm
+        perfect_info_charging_routing_storage(
+            vehicles=vehicles,
+            charging_stations=charging_stations,
+            env=env,
+            grid_capacity=grid_capacity,
+            electricity_cost=electricity_cost,
+            sim_time=sim_time,
+            baseload=baseload,
+            storage=storage,
+            service_level=service_level,
+            time_range=time_range
+        )
+        
+        # Extract charging actions
+        return [vehicle.charging_power for vehicle in vehicles]
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Algorithm-based agents don't learn from transitions."""
+        pass
+
+
+class AlgorithmRoutingAgent(RoutingAgent):
+    """
+    Agent that wraps existing routing algorithms.
+    
+    This agent provides a standardized interface to all the existing
+    routing algorithms in the codebase.
+    """
+    
+    def __init__(self, algorithm: str = "lowest_occupancy_first"):
+        self.algorithm = algorithm
+        self.state = None
+        self._agent_type = AgentType.HEURISTIC
+        self._decision_type = DecisionType.ROUTING
+        
+        # Algorithm mapping
+        self.algorithm_functions = {
+            "random": self._random_routing,
+            "lowest_occupancy_first": self._lowest_occupancy_first,
+            "fill_one_after_other": self._fill_one_after_other,
+            "lowest_utilization_first": self._lowest_utilization_first,
+            "matching_supply_demand": self._matching_supply_demand,
+            "minimum_power_requirement": self._minimum_power_requirement
+        }
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        self.state = context
+    
+    def get_state(self) -> Any:
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        self.state = state
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select routing action using the specified algorithm.
+        
+        Args:
+            vehicles: List of vehicles requiring routing
+            context: Dictionary containing routing context
+            
+        Returns:
+            Dictionary containing routing decision
+        """
+        if self.algorithm not in self.algorithm_functions:
+            raise ValueError(f"Unknown routing algorithm: {self.algorithm}")
+        
+        # Get algorithm function
+        algo_func = self.algorithm_functions[self.algorithm]
+        
+        # Execute algorithm for each vehicle
+        routing_assignments = []
+        for vehicle in vehicles:
+            assignment = algo_func(vehicle, context)
+            routing_assignments.append(assignment)
+        
+        return {
+            "routing_assignments": routing_assignments,
+            "queue_order": list(range(len(vehicles))),
+            "wait_times": [0] * len(vehicles),  # Placeholder
+            "confidence": 0.9,
+            "strategy": self.algorithm,
+            "reasoning": f"Applied {self.algorithm} routing algorithm to {len(vehicles)} vehicles"
+        }
+    
+    def _random_routing(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Random routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 100 * 60)
+        
+        return random_charger_assignment(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def _lowest_occupancy_first(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Lowest occupancy first routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 100 * 60)
+        
+        return lowest_occupancy_first_charger_assignment(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def _fill_one_after_other(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Fill one after other routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 24 * 60)
+        
+        return fill_one_after_other_charger_assignment(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def _lowest_utilization_first(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Lowest utilization first routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 100 * 60)
+        
+        return lowest_utilization_first_charger_assignment(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def _matching_supply_demand(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Matching supply demand routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 100 * 60)
+        
+        return matching_supply_demand_level(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def _minimum_power_requirement(self, vehicle: Any, context: Dict[str, Any]) -> Any:
+        """Minimum power requirement routing algorithm."""
+        charging_stations = context.get("charging_stations", [])
+        number_of_connectors = context.get("number_of_connectors", 1)
+        demand_threshold = context.get("demand_threshold", 1)
+        duration_threshold = context.get("duration_threshold", 100 * 60)
+        
+        return assign_to_the_minimum_power(
+            charging_stations=charging_stations,
+            number_of_connectors=number_of_connectors,
+            request=vehicle,
+            demand_threshold=demand_threshold,
+            duration_threshold=duration_threshold
+        )
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Algorithm-based agents don't learn from transitions."""
+        pass
+
+
+class AlgorithmStorageAgent(StorageAgent):
+    """
+    Agent that wraps existing storage algorithms.
+    
+    This agent provides a standardized interface to all the existing
+    storage algorithms in the codebase.
+    """
+    
+    def __init__(self, algorithm: str = "peak_shaving"):
+        self.algorithm = algorithm
+        self.state = None
+        self._agent_type = AgentType.HEURISTIC
+        self._decision_type = DecisionType.STORAGE
+        
+        # Algorithm mapping
+        self.algorithm_functions = {
+            "uncontrolled": self._uncontrolled_storage,
+            "temporal_arbitrage": self._temporal_arbitrage,
+            "peak_shaving": self._peak_shaving
+        }
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        self.state = context
+    
+    def get_state(self) -> Any:
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        self.state = state
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select storage action using the specified algorithm.
+        
+        Args:
+            context: Dictionary containing storage context
+            
+        Returns:
+            Dictionary containing storage decision
+        """
+        if self.algorithm not in self.algorithm_functions:
+            raise ValueError(f"Unknown storage algorithm: {self.algorithm}")
+        
+        # Get algorithm function
+        algo_func = self.algorithm_functions[self.algorithm]
+        
+        # Execute algorithm
+        storage_action = algo_func(context)
+        
+        return {
+            "storage_action": storage_action,
+            "power_level": abs(storage_action),
+            "strategy": self.algorithm,
+            "confidence": 0.9,
+            "reasoning": f"Applied {self.algorithm} storage algorithm"
+        }
+    
+    def _uncontrolled_storage(self, context: Dict[str, Any]) -> float:
+        """Uncontrolled storage algorithm."""
+        env = context.get("env")
+        storage_object = context.get("storage_object")
+        
+        # Execute algorithm
+        storage_uncontrolled(env=env, storage_object=storage_object)
+        
+        # Extract storage action
+        if storage_object.charge_yn == 1:
+            return storage_object.charging_power
+        elif storage_object.discharge_yn == 1:
+            return -storage_object.discharging_power
+        else:
+            return 0.0
+    
+    def _temporal_arbitrage(self, context: Dict[str, Any]) -> float:
+        """Temporal arbitrage storage algorithm."""
+        env = context.get("env")
+        storage_object = context.get("storage_object")
+        planning_interval = context.get("planning_interval", 15)
+        electricity_tariff = context.get("electricity_tariff", [0.15] * 24)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        ev_charging_load = context.get("ev_charging_load", 100)
+        
+        # Execute algorithm
+        temporal_arbitrage(
+            env=env,
+            storage_object=storage_object,
+            planning_interval=planning_interval,
+            electricity_tariff=electricity_tariff,
+            free_grid_capacity=free_grid_capacity,
+            ev_charging_load=ev_charging_load
+        )
+        
+        # Extract storage action
+        if storage_object.charge_yn == 1:
+            return storage_object.charging_power
+        elif storage_object.discharge_yn == 1:
+            return -storage_object.discharging_power
+        else:
+            return 0.0
+    
+    def _peak_shaving(self, context: Dict[str, Any]) -> float:
+        """Peak shaving storage algorithm."""
+        env = context.get("env")
+        storage_object = context.get("storage_object")
+        planning_interval = context.get("planning_interval", 15)
+        electricity_tariff = context.get("electricity_tariff", [0.15] * 24)
+        free_grid_capacity = context.get("free_grid_capacity", 500)
+        ev_charging_load = context.get("ev_charging_load", 100)
+        
+        # Execute algorithm
+        peak_shaving(
+            env=env,
+            storage_object=storage_object,
+            planning_interval=planning_interval,
+            electricity_tariff=electricity_tariff,
+            free_grid_capacity=free_grid_capacity,
+            ev_charging_load=ev_charging_load
+        )
+        
+        # Extract storage action
+        if storage_object.charge_yn == 1:
+            return storage_object.charging_power
+        elif storage_object.discharge_yn == 1:
+            return -storage_object.discharging_power
+        else:
+            return 0.0
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Algorithm-based agents don't learn from transitions."""
+        pass
diff --git a/utilities/rl_agents/factory.py b/utilities/rl_agents/factory.py
new file mode 100644
index 0000000..4de1788
--- /dev/null
+++ b/utilities/rl_agents/factory.py
@@ -0,0 +1,196 @@
+from typing import Any, Optional
+from simulation.operations.agents_controller import AgentsController
+from utilities.rl_agents.adapters.pricing_adapter import PricingEnvAgentAdapter
+from utilities.rl_agents.adapters.charging_adapter import ChargingEnvAgentAdapter
+from utilities.rl_agents.adapters.storage_adapter import StorageEnvAgentAdapter
+from utilities.rl_agents.adapters.gym_agent_adapter import (
+    GymPricingAgentAdapter, GymChargingAgentAdapter, GymStorageAgentAdapter,
+    create_gym_pricing_adapter, create_gym_charging_adapter, create_gym_storage_adapter
+)
+from utilities.rl_environments.rl_pricing_env import PricingEnv
+
+
+def build_pricing_adapter(config: Any, policy: Any, dqn: bool = False) -> PricingEnvAgentAdapter:
+    """
+    Build a PricingEnv and wrap the given policy with the adapter.
+
+    Args:
+        config: pricing env configuration object (must have number_power_options, maximum_power, evaluation)
+        policy: underlying RL policy with act(obs) and optional reset()/learn()
+        dqn: whether to initialize env in DQN discrete mode
+
+    Returns:
+        PricingEnvAgentAdapter instance
+    """
+    env = PricingEnv(config, DQN=dqn, charging_hub=None, env=None)
+    return PricingEnvAgentAdapter(env=env, policy=policy)
+
+
+def build_charging_adapter(config: Any, policy: Any) -> ChargingEnvAgentAdapter:
+    """
+    Build a charging environment and wrap the given policy with the adapter.
+
+    Args:
+        config: charging env configuration object
+        policy: underlying RL policy with act(obs) and optional reset()/learn()
+
+    Returns:
+        ChargingEnvAgentAdapter instance
+    """
+    from utilities.rl_environments.SC_env import ChargingHubInvestmentEnv
+    env = ChargingHubInvestmentEnv(config, charging_hub=None, env=None)
+    return ChargingEnvAgentAdapter(rl_agent=policy, charging_env=env)
+
+
+def build_storage_adapter(config: Any, policy: Any) -> StorageEnvAgentAdapter:
+    """
+    Build a storage environment and wrap the given policy with the adapter.
+
+    Args:
+        config: storage env configuration object
+        policy: underlying RL policy with act(obs) and optional reset()/learn()
+
+    Returns:
+        StorageEnvAgentAdapter instance
+    """
+    from utilities.rl_environments.SC_storage_env import StorageEnv
+    env = StorageEnv(config, charging_hub=None, env=None)
+    return StorageEnvAgentAdapter(rl_agent=policy, storage_env=env)
+
+
+def build_agents_controller(
+    pricing_agent: Optional[Any] = None,
+    charging_agent: Optional[Any] = None,
+    storage_agent: Optional[Any] = None,
+    pricing_env: Optional[PricingEnv] = None,
+    pricing_config: Optional[Any] = None,
+    charging_config: Optional[Any] = None,
+    storage_config: Optional[Any] = None
+) -> AgentsController:
+    """
+    Build an AgentsController with the specified agents.
+
+    Args:
+        pricing_agent: RL agent for pricing (e.g., SAC, DQN)
+        charging_agent: RL agent for charging (e.g., SAC, DQN)
+        storage_agent: RL agent for storage (e.g., SAC, DQN)
+        pricing_env: Pricing environment (optional, will be created if not provided)
+        pricing_config: Configuration for pricing environment
+        charging_config: Configuration for charging environment
+        storage_config: Configuration for storage environment
+
+    Returns:
+        AgentsController instance
+    """
+    pricing_adapter = None
+    charging_adapter = None
+    storage_adapter = None
+    
+    if pricing_agent and pricing_config:
+        if not pricing_env:
+            pricing_env = PricingEnv(pricing_config, charging_hub=None, env=None)
+        pricing_adapter = PricingEnvAgentAdapter(rl_agent=pricing_agent, pricing_env=pricing_env)
+    
+    if charging_agent and charging_config:
+        charging_adapter = build_charging_adapter(charging_config, charging_agent)
+    
+    if storage_agent and storage_config:
+        storage_adapter = build_storage_adapter(storage_config, storage_agent)
+    
+    return AgentsController(
+        pricing=pricing_adapter,
+        charging=charging_adapter,
+        storage=storage_adapter
+    )
+
+
+# Gym-compatible agent factory functions
+def build_gym_pricing_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymPricingAgentAdapter:
+    """
+    Build a gym-compatible pricing agent adapter.
+    
+    Args:
+        config_dict: Configuration dictionary for the environment
+        gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent)
+        **kwargs: Additional arguments for environment creation
+        
+    Returns:
+        GymPricingAgentAdapter instance
+    """
+    return create_gym_pricing_adapter(config_dict, gym_agent, **kwargs)
+
+
+def build_gym_charging_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymChargingAgentAdapter:
+    """
+    Build a gym-compatible charging agent adapter.
+    
+    Args:
+        config_dict: Configuration dictionary for the environment
+        gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent)
+        **kwargs: Additional arguments for environment creation
+        
+    Returns:
+        GymChargingAgentAdapter instance
+    """
+    return create_gym_charging_adapter(config_dict, gym_agent, **kwargs)
+
+
+def build_gym_storage_adapter(config_dict: Dict[str, Any], gym_agent: Any, **kwargs) -> GymStorageAgentAdapter:
+    """
+    Build a gym-compatible storage agent adapter.
+    
+    Args:
+        config_dict: Configuration dictionary for the environment
+        gym_agent: Gym-compatible RL agent (e.g., Stable Baselines3 agent)
+        **kwargs: Additional arguments for environment creation
+        
+    Returns:
+        GymStorageAgentAdapter instance
+    """
+    return create_gym_storage_adapter(config_dict, gym_agent, **kwargs)
+
+
+def build_gym_agents_controller(
+    pricing_agent: Optional[Any] = None,
+    charging_agent: Optional[Any] = None,
+    storage_agent: Optional[Any] = None,
+    pricing_config: Optional[Dict[str, Any]] = None,
+    charging_config: Optional[Dict[str, Any]] = None,
+    storage_config: Optional[Dict[str, Any]] = None,
+    **kwargs
+) -> AgentsController:
+    """
+    Build an AgentsController with gym-compatible agents.
+    
+    Args:
+        pricing_agent: Gym-compatible RL agent for pricing
+        charging_agent: Gym-compatible RL agent for charging
+        storage_agent: Gym-compatible RL agent for storage
+        pricing_config: Configuration for pricing environment
+        charging_config: Configuration for charging environment
+        storage_config: Configuration for storage environment
+        **kwargs: Additional arguments for environment creation
+        
+    Returns:
+        AgentsController instance with gym-compatible agents
+    """
+    pricing_adapter = None
+    charging_adapter = None
+    storage_adapter = None
+    
+    if pricing_agent and pricing_config:
+        pricing_adapter = build_gym_pricing_adapter(pricing_config, pricing_agent, **kwargs)
+    
+    if charging_agent and charging_config:
+        charging_adapter = build_gym_charging_adapter(charging_config, charging_agent, **kwargs)
+    
+    if storage_agent and storage_config:
+        storage_adapter = build_gym_storage_adapter(storage_config, storage_agent, **kwargs)
+    
+    return AgentsController(
+        pricing=pricing_adapter,
+        charging=charging_adapter,
+        storage=storage_adapter
+    )
+
+
diff --git a/utilities/rl_agents/interfaces.py b/utilities/rl_agents/interfaces.py
new file mode 100644
index 0000000..12706b0
--- /dev/null
+++ b/utilities/rl_agents/interfaces.py
@@ -0,0 +1,317 @@
+from typing import Protocol, Any, Dict, List, Optional, Union
+from enum import Enum
+
+
+class DecisionType(Enum):
+    """Types of decisions that can be made by agents"""
+    PRICING = "pricing"
+    CHARGING = "charging"
+    STORAGE = "storage"
+    ROUTING = "routing"
+    VEHICLE_ASSIGNMENT = "vehicle_assignment"
+    PARKING_ALLOCATION = "parking_allocation"
+    GRID_MANAGEMENT = "grid_management"
+    DEMAND_FORECASTING = "demand_forecasting"
+
+
+class AgentType(Enum):
+    """Types of agents that can make decisions"""
+    RL_SAC = "rl_sac"
+    RL_DQN = "rl_dqn"
+    RL_DDPG = "rl_ddpg"
+    RULE_BASED = "rule_based"
+    HEURISTIC = "heuristic"
+    OPTIMIZATION = "optimization"
+    ML_MODEL = "ml_model"
+
+
+class BaseAgent(Protocol):
+    """
+    Base interface for all decision-making agents.
+    
+    This protocol defines the standard interface that all agents must implement,
+    regardless of whether they are RL agents, rule-based agents, or other types.
+    """
+    
+    @property
+    def agent_type(self) -> AgentType: ...
+    
+    @property
+    def decision_type(self) -> DecisionType: ...
+    
+    def reset(self) -> None: ...
+    
+    def update_state(self, context: Dict[str, Any]) -> None: ...
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]: ...
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None: ...
+    
+    def get_state(self) -> Any: ...
+    
+    def set_state(self, state: Any) -> None: ...
+
+
+class PricingAgent(BaseAgent):
+    """
+    Interface for pricing decision agents.
+    
+    Pricing agents make decisions about:
+    - Energy prices
+    - Parking fees
+    - Dynamic pricing strategies
+    - Price optimization
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.PRICING
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select pricing action based on current context.
+        
+        Args:
+            context: Dictionary containing:
+                - eval_ep: Whether this is evaluation mode
+                - pricing_mode: "Discrete", "Continuous", "ToU"
+                - charging_hub: Current charging hub state
+                - env: Simulation environment
+                - current_demand: Current energy demand
+                - grid_capacity: Available grid capacity
+                
+        Returns:
+            Dictionary containing:
+                - pricing_parameters: List of pricing parameters
+                - energy_price: Energy price per kWh
+                - parking_fee: Parking fee per hour
+                - confidence: Confidence in the decision (0-1)
+        """
+        ...
+
+
+class ChargingAgent(BaseAgent):
+    """
+    Interface for charging decision agents.
+    
+    Charging agents make decisions about:
+    - Charging power allocation
+    - Charging schedules
+    - Priority assignment
+    - Load balancing
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.CHARGING
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select charging action based on vehicles and context.
+        
+        Args:
+            vehicles: List of vehicles requiring charging decisions
+            context: Dictionary containing:
+                - eval_ep: Whether this is evaluation mode
+                - charging_hub: Current charging hub state
+                - env: Simulation environment
+                - available_power: Available charging power
+                - grid_constraints: Grid capacity constraints
+                
+        Returns:
+            Dictionary containing:
+                - charging_actions: List of charging actions per vehicle
+                - power_allocation: Power allocation strategy
+                - priority_order: Vehicle priority ordering
+                - confidence: Confidence in the decision (0-1)
+        """
+        ...
+
+
+class StorageAgent(BaseAgent):
+    """
+    Interface for storage decision agents.
+    
+    Storage agents make decisions about:
+    - Energy storage charging/discharging
+    - Storage scheduling
+    - Peak shaving strategies
+    - Grid support operations
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.STORAGE
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select storage action based on current context.
+        
+        Args:
+            context: Dictionary containing:
+                - eval_ep: Whether this is evaluation mode
+                - charging_hub: Current charging hub state
+                - env: Simulation environment
+                - storage_soc: Current state of charge
+                - grid_demand: Current grid demand
+                - pv_generation: Current PV generation
+                
+        Returns:
+            Dictionary containing:
+                - storage_action: Charging/discharging action
+                - power_level: Power level for storage operation
+                - strategy: Storage strategy (peak_shaving, arbitrage, etc.)
+                - confidence: Confidence in the decision (0-1)
+        """
+        ...
+
+
+class RoutingAgent(BaseAgent):
+    """
+    Interface for routing decision agents.
+    
+    Routing agents make decisions about:
+    - Vehicle routing to charging stations
+    - Parking space allocation
+    - Queue management
+    - Resource assignment
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.ROUTING
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select routing action based on vehicles and context.
+        
+        Args:
+            vehicles: List of vehicles requiring routing decisions
+            context: Dictionary containing:
+                - eval_ep: Whether this is evaluation mode
+                - charging_hub: Current charging hub state
+                - env: Simulation environment
+                - available_spaces: Available parking/charging spaces
+                - queue_status: Current queue status
+                
+        Returns:
+            Dictionary containing:
+                - routing_assignments: Vehicle to space assignments
+                - queue_order: Queue ordering
+                - wait_times: Estimated wait times
+                - confidence: Confidence in the decision (0-1)
+        """
+        ...
+
+
+class VehicleAssignmentAgent(BaseAgent):
+    """
+    Interface for vehicle assignment decision agents.
+    
+    Vehicle assignment agents make decisions about:
+    - Which charging station to assign vehicles to
+    - Charging connector allocation
+    - Priority-based assignments
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.VEHICLE_ASSIGNMENT
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select vehicle assignment action.
+        
+        Args:
+            vehicles: List of vehicles to assign
+            context: Dictionary containing assignment context
+            
+        Returns:
+            Dictionary containing assignment decisions
+        """
+        ...
+
+
+class ParkingAllocationAgent(BaseAgent):
+    """
+    Interface for parking allocation decision agents.
+    
+    Parking allocation agents make decisions about:
+    - Parking space allocation
+    - Parking duration optimization
+    - Space utilization
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.PARKING_ALLOCATION
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select parking allocation action.
+        
+        Args:
+            vehicles: List of vehicles requiring parking
+            context: Dictionary containing parking context
+            
+        Returns:
+            Dictionary containing parking allocation decisions
+        """
+        ...
+
+
+class GridManagementAgent(BaseAgent):
+    """
+    Interface for grid management decision agents.
+    
+    Grid management agents make decisions about:
+    - Grid capacity management
+    - Load balancing
+    - Grid stability
+    - Peak demand management
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.GRID_MANAGEMENT
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select grid management action.
+        
+        Args:
+            context: Dictionary containing grid management context
+            
+        Returns:
+            Dictionary containing grid management decisions
+        """
+        ...
+
+
+class DemandForecastingAgent(BaseAgent):
+    """
+    Interface for demand forecasting agents.
+    
+    Demand forecasting agents make decisions about:
+    - Energy demand prediction
+    - Load forecasting
+    - Demand patterns analysis
+    """
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return DecisionType.DEMAND_FORECASTING
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select demand forecasting action.
+        
+        Args:
+            context: Dictionary containing forecasting context
+            
+        Returns:
+            Dictionary containing demand forecasts
+        """
+        ...
+
+
diff --git a/utilities/rl_agents/rule_based_agents.py b/utilities/rl_agents/rule_based_agents.py
new file mode 100644
index 0000000..7ff1d6c
--- /dev/null
+++ b/utilities/rl_agents/rule_based_agents.py
@@ -0,0 +1,479 @@
+from typing import Any, Dict, List, Optional
+import numpy as np
+from datetime import datetime
+
+from utilities.rl_agents.interfaces import (
+    BaseAgent, 
+    DecisionType, 
+    AgentType,
+    PricingAgent,
+    ChargingAgent,
+    StorageAgent,
+    RoutingAgent,
+    VehicleAssignmentAgent,
+    ParkingAllocationAgent,
+    GridManagementAgent,
+    DemandForecastingAgent
+)
+
+
+class RuleBasedPricingAgent(PricingAgent):
+    """
+    Rule-based pricing agent that implements simple pricing strategies.
+    
+    This agent demonstrates how rule-based agents can be used alongside RL agents.
+    It implements common pricing strategies like time-of-use, demand-based, and
+    cost-plus pricing.
+    """
+    
+    def __init__(self, strategy: str = "time_of_use"):
+        self.strategy = strategy
+        self.state = None
+        self._agent_type = AgentType.RULE_BASED
+        self._decision_type = DecisionType.PRICING
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        """Reset the agent state."""
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        """Update agent state based on context."""
+        self.state = context
+    
+    def get_state(self) -> Any:
+        """Get current agent state."""
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        """Set agent state."""
+        self.state = state
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select pricing action based on rule-based strategy.
+        
+        Args:
+            context: Dictionary containing pricing context
+            
+        Returns:
+            Dictionary containing pricing decision
+        """
+        if self.strategy == "time_of_use":
+            return self._time_of_use_pricing(context)
+        elif self.strategy == "demand_based":
+            return self._demand_based_pricing(context)
+        elif self.strategy == "cost_plus":
+            return self._cost_plus_pricing(context)
+        else:
+            return self._default_pricing(context)
+    
+    def _time_of_use_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Time-of-use pricing strategy."""
+        env = context.get("env")
+        current_hour = int((env.now % 1440) / 60) if env else 12
+        
+        # Peak hours: 8-10 AM and 6-8 PM
+        if current_hour in [8, 9, 18, 19]:
+            energy_price = 0.25  # High price during peak
+        elif current_hour in [10, 11, 12, 13, 14, 15, 16, 17]:
+            energy_price = 0.15  # Medium price during day
+        else:
+            energy_price = 0.10  # Low price during off-peak
+        
+        parking_fee = 2.0  # Fixed parking fee
+        
+        return {
+            "pricing_parameters": [energy_price, parking_fee],
+            "energy_price": energy_price,
+            "parking_fee": parking_fee,
+            "confidence": 0.9,
+            "strategy": "time_of_use",
+            "reasoning": f"Peak hour pricing applied for hour {current_hour}"
+        }
+    
+    def _demand_based_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Demand-based pricing strategy."""
+        charging_hub = context.get("charging_hub")
+        current_demand = charging_hub.grid.current_load if charging_hub else 100
+        max_capacity = charging_hub.grid.capacity if charging_hub else 500
+        
+        # Calculate demand ratio
+        demand_ratio = current_demand / max_capacity if max_capacity > 0 else 0.2
+        
+        # Base price with demand multiplier
+        base_price = 0.15
+        if demand_ratio > 0.8:
+            energy_price = base_price * 1.5  # High demand
+        elif demand_ratio > 0.6:
+            energy_price = base_price * 1.2  # Medium demand
+        else:
+            energy_price = base_price  # Low demand
+        
+        parking_fee = 2.0
+        
+        return {
+            "pricing_parameters": [energy_price, parking_fee],
+            "energy_price": energy_price,
+            "parking_fee": parking_fee,
+            "confidence": 0.85,
+            "strategy": "demand_based",
+            "reasoning": f"Demand ratio {demand_ratio:.2f} applied"
+        }
+    
+    def _cost_plus_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Cost-plus pricing strategy."""
+        # Assume base electricity cost
+        base_cost = 0.12
+        markup = 0.25  # 25% markup
+        
+        energy_price = base_cost * (1 + markup)
+        parking_fee = 2.0
+        
+        return {
+            "pricing_parameters": [energy_price, parking_fee],
+            "energy_price": energy_price,
+            "parking_fee": parking_fee,
+            "confidence": 0.95,
+            "strategy": "cost_plus",
+            "reasoning": f"Cost-plus pricing with {markup*100}% markup"
+        }
+    
+    def _default_pricing(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Default pricing strategy."""
+        energy_price = 0.15
+        parking_fee = 2.0
+        
+        return {
+            "pricing_parameters": [energy_price, parking_fee],
+            "energy_price": energy_price,
+            "parking_fee": parking_fee,
+            "confidence": 0.8,
+            "strategy": "default",
+            "reasoning": "Default pricing applied"
+        }
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Rule-based agents don't learn from transitions."""
+        pass
+
+
+class RuleBasedChargingAgent(ChargingAgent):
+    """
+    Rule-based charging agent that implements simple charging strategies.
+    
+    This agent implements strategies like first-come-first-served, priority-based,
+    and load-balancing charging.
+    """
+    
+    def __init__(self, strategy: str = "first_come_first_served"):
+        self.strategy = strategy
+        self.state = None
+        self._agent_type = AgentType.RULE_BASED
+        self._decision_type = DecisionType.CHARGING
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        self.state = context
+    
+    def get_state(self) -> Any:
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        self.state = state
+    
+    def select_action(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select charging action based on rule-based strategy.
+        
+        Args:
+            vehicles: List of vehicles requiring charging
+            context: Dictionary containing charging context
+            
+        Returns:
+            Dictionary containing charging decision
+        """
+        if self.strategy == "first_come_first_served":
+            return self._first_come_first_served(vehicles, context)
+        elif self.strategy == "priority_based":
+            return self._priority_based(vehicles, context)
+        elif self.strategy == "load_balancing":
+            return self._load_balancing(vehicles, context)
+        else:
+            return self._default_charging(vehicles, context)
+    
+    def _first_come_first_served(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """First-come-first-served charging strategy."""
+        charging_actions = []
+        priority_order = []
+        
+        # Sort vehicles by arrival time
+        sorted_vehicles = sorted(vehicles, key=lambda v: v.arrival_period)
+        
+        for i, vehicle in enumerate(sorted_vehicles):
+            # Assign equal power to all vehicles
+            charging_power = 22.0  # Default charging power
+            charging_actions.append(charging_power)
+            priority_order.append(i)
+        
+        return {
+            "charging_actions": charging_actions,
+            "power_allocation": "equal",
+            "priority_order": priority_order,
+            "confidence": 0.9,
+            "strategy": "first_come_first_served",
+            "reasoning": f"FCFS strategy applied to {len(vehicles)} vehicles"
+        }
+    
+    def _priority_based(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """Priority-based charging strategy."""
+        charging_actions = []
+        priority_order = []
+        
+        # Sort vehicles by priority (energy deficit, departure time, etc.)
+        def priority_key(vehicle):
+            energy_deficit = vehicle.remaining_energy_deficit
+            time_until_departure = vehicle.departure_period - context.get("env", {}).now
+            return (energy_deficit, -time_until_departure)  # Higher deficit and earlier departure = higher priority
+        
+        sorted_vehicles = sorted(vehicles, key=priority_key, reverse=True)
+        
+        for i, vehicle in enumerate(sorted_vehicles):
+            # Higher priority vehicles get more power
+            if i < len(vehicles) // 3:
+                charging_power = 50.0  # High priority
+            elif i < 2 * len(vehicles) // 3:
+                charging_power = 22.0  # Medium priority
+            else:
+                charging_power = 11.0  # Low priority
+            
+            charging_actions.append(charging_power)
+            priority_order.append(i)
+        
+        return {
+            "charging_actions": charging_actions,
+            "power_allocation": "priority_based",
+            "priority_order": priority_order,
+            "confidence": 0.85,
+            "strategy": "priority_based",
+            "reasoning": f"Priority-based strategy applied to {len(vehicles)} vehicles"
+        }
+    
+    def _load_balancing(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """Load balancing charging strategy."""
+        charging_actions = []
+        priority_order = []
+        
+        # Calculate total available power
+        charging_hub = context.get("charging_hub")
+        available_power = charging_hub.grid.capacity if charging_hub else 500
+        
+        # Distribute power evenly among vehicles
+        power_per_vehicle = available_power / len(vehicles) if vehicles else 0
+        
+        for i, vehicle in enumerate(vehicles):
+            charging_actions.append(power_per_vehicle)
+            priority_order.append(i)
+        
+        return {
+            "charging_actions": charging_actions,
+            "power_allocation": "load_balanced",
+            "priority_order": priority_order,
+            "confidence": 0.8,
+            "strategy": "load_balancing",
+            "reasoning": f"Load balancing with {power_per_vehicle:.1f} kW per vehicle"
+        }
+    
+    def _default_charging(self, vehicles: List[Any], context: Dict[str, Any]) -> Dict[str, Any]:
+        """Default charging strategy."""
+        charging_actions = [22.0] * len(vehicles)  # Default power for all vehicles
+        priority_order = list(range(len(vehicles)))
+        
+        return {
+            "charging_actions": charging_actions,
+            "power_allocation": "default",
+            "priority_order": priority_order,
+            "confidence": 0.7,
+            "strategy": "default",
+            "reasoning": f"Default charging strategy applied to {len(vehicles)} vehicles"
+        }
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Rule-based agents don't learn from transitions."""
+        pass
+
+
+class RuleBasedStorageAgent(StorageAgent):
+    """
+    Rule-based storage agent that implements simple storage strategies.
+    
+    This agent implements strategies like peak shaving, arbitrage, and
+    grid support operations.
+    """
+    
+    def __init__(self, strategy: str = "peak_shaving"):
+        self.strategy = strategy
+        self.state = None
+        self._agent_type = AgentType.RULE_BASED
+        self._decision_type = DecisionType.STORAGE
+        
+    @property
+    def agent_type(self) -> AgentType:
+        return self._agent_type
+    
+    @property
+    def decision_type(self) -> DecisionType:
+        return self._decision_type
+    
+    def reset(self) -> None:
+        self.state = None
+    
+    def update_state(self, context: Dict[str, Any]) -> None:
+        self.state = context
+    
+    def get_state(self) -> Any:
+        return self.state
+    
+    def set_state(self, state: Any) -> None:
+        self.state = state
+    
+    def select_action(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Select storage action based on rule-based strategy.
+        
+        Args:
+            context: Dictionary containing storage context
+            
+        Returns:
+            Dictionary containing storage decision
+        """
+        if self.strategy == "peak_shaving":
+            return self._peak_shaving(context)
+        elif self.strategy == "arbitrage":
+            return self._arbitrage(context)
+        elif self.strategy == "grid_support":
+            return self._grid_support(context)
+        else:
+            return self._default_storage(context)
+    
+    def _peak_shaving(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Peak shaving storage strategy."""
+        charging_hub = context.get("charging_hub")
+        current_load = charging_hub.grid.current_load if charging_hub else 100
+        max_capacity = charging_hub.grid.capacity if charging_hub else 500
+        storage_soc = charging_hub.electric_storage.soc if charging_hub else 0.5
+        
+        # Discharge if load is high and storage has capacity
+        if current_load > max_capacity * 0.8 and storage_soc > 0.2:
+            storage_action = -50.0  # Discharge
+            strategy = "peak_shaving_discharge"
+        elif current_load < max_capacity * 0.4 and storage_soc < 0.8:
+            storage_action = 30.0   # Charge
+            strategy = "peak_shaving_charge"
+        else:
+            storage_action = 0.0    # No action
+            strategy = "peak_shaving_idle"
+        
+        return {
+            "storage_action": storage_action,
+            "power_level": abs(storage_action),
+            "strategy": strategy,
+            "confidence": 0.85,
+            "reasoning": f"Peak shaving: load={current_load:.1f}, soc={storage_soc:.2f}"
+        }
+    
+    def _arbitrage(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Arbitrage storage strategy."""
+        env = context.get("env")
+        current_hour = int((env.now % 1440) / 60) if env else 12
+        storage_soc = context.get("storage_soc", 0.5)
+        
+        # Charge during low-price hours (night), discharge during high-price hours (day)
+        if 22 <= current_hour or current_hour <= 6:  # Night hours
+            if storage_soc < 0.9:
+                storage_action = 40.0  # Charge
+                strategy = "arbitrage_charge"
+            else:
+                storage_action = 0.0   # Full
+                strategy = "arbitrage_full"
+        else:  # Day hours
+            if storage_soc > 0.1:
+                storage_action = -40.0  # Discharge
+                strategy = "arbitrage_discharge"
+            else:
+                storage_action = 0.0    # Empty
+                strategy = "arbitrage_empty"
+        
+        return {
+            "storage_action": storage_action,
+            "power_level": abs(storage_action),
+            "strategy": strategy,
+            "confidence": 0.8,
+            "reasoning": f"Arbitrage: hour={current_hour}, soc={storage_soc:.2f}"
+        }
+    
+    def _grid_support(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Grid support storage strategy."""
+        charging_hub = context.get("charging_hub")
+        grid_frequency = getattr(charging_hub.grid, 'frequency', 50.0) if charging_hub else 50.0
+        storage_soc = charging_hub.electric_storage.soc if charging_hub else 0.5
+        
+        # Support grid frequency
+        if grid_frequency < 49.8:  # Low frequency
+            if storage_soc > 0.1:
+                storage_action = -30.0  # Discharge to support
+                strategy = "grid_support_discharge"
+            else:
+                storage_action = 0.0
+                strategy = "grid_support_empty"
+        elif grid_frequency > 50.2:  # High frequency
+            if storage_soc < 0.9:
+                storage_action = 30.0   # Charge to absorb
+                strategy = "grid_support_charge"
+            else:
+                storage_action = 0.0
+                strategy = "grid_support_full"
+        else:  # Normal frequency
+            storage_action = 0.0
+            strategy = "grid_support_idle"
+        
+        return {
+            "storage_action": storage_action,
+            "power_level": abs(storage_action),
+            "strategy": strategy,
+            "confidence": 0.9,
+            "reasoning": f"Grid support: frequency={grid_frequency:.1f}, soc={storage_soc:.2f}"
+        }
+    
+    def _default_storage(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Default storage strategy."""
+        storage_action = 0.0
+        
+        return {
+            "storage_action": storage_action,
+            "power_level": 0.0,
+            "strategy": "default",
+            "confidence": 0.7,
+            "reasoning": "Default storage strategy applied"
+        }
+    
+    def learn(self, transition: Optional[Dict[str, Any]] = None) -> None:
+        """Rule-based agents don't learn from transitions."""
+        pass
diff --git a/utilities/rl_environments/SC_env.py b/utilities/rl_environments/SC_env.py
index 5f0e36a..695b458 100644
--- a/utilities/rl_environments/SC_env.py
+++ b/utilities/rl_environments/SC_env.py
@@ -10,7 +10,7 @@ class ChargingHubInvestmentEnv(gym.Env):
     reward_range = (-float("inf"), float("inf"))
     spec = None
 
-    def __init__(self, config):
+    def __init__(self, config, charging_hub=None, env=None):
         # Set these in ALL subclasses
         self.action_space = spaces.Box(
             low=0,
@@ -26,8 +26,8 @@ def __init__(self, config):
             shape=(config.number_chargers * 3 + 2 + 5,),
             dtype=np.float64,
         )
-        self.charging_hub = None
-        self.env = None
+        self.charging_hub = charging_hub
+        self.env = env
         self.id = 1
         self.episode = 0
         # vehicles_to_decide = [vehicle for vehicle in self.fleet.vehicles if vehicle.mode in ['idle','parking','circling']][0:10]
@@ -129,7 +129,7 @@ def get_state(self, charging_hub=None, env=None):
                 state = np.append(state, charger_state)
         return state
 
-    def step(self, action, charging_hub=None, env=None):
+    def step(self, action):
         # Execute one time step within the environment
         # the first action is charging/discharging of the battery
         # storage_power = action[0]
@@ -144,9 +144,9 @@ def step(self, action, charging_hub=None, env=None):
         #     if len(charging_vehicles) > 0:
         #         charging_vehicles[0].charging_power = action[i+1]
         self.current_step += 1
-        reward = self._take_action(action, charging_hub, env)
+        reward = self._take_action(action)
         done = self.current_step >= 100000000000000
-        obs = self._next_observation(charging_hub, env)
+        obs = self._next_observation()
         return obs, reward, done, {}
 
     def receive_action(self):
@@ -165,28 +165,130 @@ def reset(self):
     def render(self, mode="human", close=False):
         print(self.reward)
 
-    def _take_action(self, action, charging_hub, env):
+    def _take_action(self, action):
 
         reward = 0
         penalty_ratio = 0.001
-        reward -= charging_hub.reward["missed"]
-        reward -= charging_hub.reward["feasibility"] * penalty_ratio
-        # reward -= charging_hub.reward['feasibility_storage'] * penalty_ratio
+        reward -= self.charging_hub.reward["missed"]
+        reward -= self.charging_hub.reward["feasibility"] * penalty_ratio
+        # reward -= self.charging_hub.reward['feasibility_storage'] * penalty_ratio
 
-        self.total_reward["missed"] -= charging_hub.reward["missed"]
+        self.total_reward["missed"] -= self.charging_hub.reward["missed"]
         # print(f'charging:{self.total_reward["missed"]}')
         self.total_reward["feasibility"] -= (
-            charging_hub.reward["feasibility"] * penalty_ratio
+            self.charging_hub.reward["feasibility"] * penalty_ratio
         )
-        # self.total_reward['feasibility_storage'] -= charging_hub.reward['feasibility_storage'] * penalty_ratio
-        self.total_reward["energy"] -= charging_hub.grid.energy_rewards * 0
+        # self.total_reward['feasibility_storage'] -= self.charging_hub.reward['feasibility_storage'] * penalty_ratio
+        self.total_reward["energy"] -= self.charging_hub.grid.energy_rewards * 0
 
-        if not charging_hub.dynamic_pricing:
-            charging_hub.reward["missed"] = 0
-            charging_hub.reward["feasibility_storage"] = 0
-            charging_hub.reward["feasibility"] = 0
+        if not self.charging_hub.dynamic_pricing:
+            self.charging_hub.reward["missed"] = 0
+            self.charging_hub.reward["feasibility_storage"] = 0
+            self.charging_hub.reward["feasibility"] = 0
 
         return reward / 100
 
-    def _next_observation(self, charging_hub, env):
-        return self.get_state(charging_hub, env)
+    def _next_observation(self):
+        return self.get_state(self.charging_hub, self.env)
+    
+    def penalty_action(self, action):
+        """
+        Calculate feasibility penalties for charging actions.
+        This method was moved from SAC.py to keep simulation logic in the environment.
+        """
+        if not self.charging_hub:
+            return
+            
+        vehicle_state = self.state[24 + 5 + 5 :] if hasattr(self, 'state') else []
+        ### check charging action
+        total_usage = np.array([])
+        i = 0
+        for charger in self.charging_hub.chargers:
+            associated_power = np.array([])
+            for j in range(charger.number_of_connectors):
+                maximum_power = charger.power
+                if vehicle_state[i * 3] <= 0:
+                    self.charging_hub.reward["feasibility"] += action[i + 1]
+                else:
+                    associated_power = np.append(associated_power, action[i + 1])
+                    total_usage = np.append(total_usage, action[i + 1])
+                i += 1
+            surplus_per_charger = max(associated_power.sum() - maximum_power, 0)
+            self.charging_hub.reward["feasibility"] += surplus_per_charger
+        total_surplus = max(
+            total_usage.sum() - self.charging_hub.operator.free_grid_capa_actual[0], 0
+        )
+        self.charging_hub.reward["feasibility"] += total_surplus
+    
+    def checked_action(self, action):
+        """
+        Check and adjust charging actions for feasibility.
+        This method was moved from SAC.py to keep simulation logic in the environment.
+        """
+        if not self.charging_hub:
+            return action
+            
+        vehicle_state = self.state[24 + 5 + 5 :] if hasattr(self, 'state') else []
+        ### check charging action
+        i = 0
+        for charger in self.charging_hub.chargers:
+            lower_bound = i + 1
+            for j in range(charger.number_of_connectors):
+                maximum_power = charger.power
+                if vehicle_state[i * 3] <= 0:
+                    action[i + 1] = 0
+                i += 1
+            upper_bound = i + 1
+
+            while action[lower_bound:upper_bound].sum() > maximum_power:
+                number_active_chargers = len(
+                    [f for f in action[lower_bound:upper_bound] if f > 0]
+                )
+                surplus_per_charger = (
+                    max(action[lower_bound:upper_bound].sum() - maximum_power, 0)
+                    / number_active_chargers
+                )
+                action[lower_bound:upper_bound] -= surplus_per_charger
+                for c in range(len(action[lower_bound:upper_bound])):
+                    action[lower_bound:upper_bound][c] = max(
+                        action[lower_bound:upper_bound][c], 0
+                    )
+
+        storage_object = self.charging_hub.electric_storage
+        storage_object.SoC = min(
+            storage_object.SoC, storage_object.max_energy_stored_kWh
+        )
+        storage_object.SoC = max(storage_object.SoC, 0)
+        if action[0] >= 0:
+            if (
+                storage_object.SoC + action[0] / 60 * self.charging_hub.planning_interval
+                > storage_object.max_energy_stored_kWh
+            ):
+                action[0] = (
+                    storage_object.max_energy_stored_kWh - storage_object.SoC
+                ) / (60 * self.charging_hub.planning_interval)
+            action[0] = min(action[0], self.charging_hub.operator.free_grid_capa_actual[0])
+
+        # discharge rate cannot exceed SoC, and hub demand (i.e., no infeed)
+        if action[0] < 0:
+            if storage_object.SoC <= 0:
+                action[0] = 0
+            elif (
+                storage_object.SoC + (action[0] / 60 * self.charging_hub.planning_interval)
+                < 0
+            ):
+                action[0] = -max(
+                    (storage_object.SoC) / (60 * self.charging_hub.planning_interval), 0
+                )
+
+        while action.sum() - self.charging_hub.operator.free_grid_capa_actual[0] > 0:
+            number_active_chargers = len([a for a in action if a > 0])
+            surplus_per_charger = (
+                max(action.sum() - self.charging_hub.operator.free_grid_capa_actual[0], 0)
+                / number_active_chargers
+            )
+            for i in range(1, len(action)):
+                action[i] = max(action[i] - surplus_per_charger, 0)
+            # if action[0]>0:
+            #     action[0] = max(action[0] - surplus_per_charger, 0)
+        return action
diff --git a/utilities/rl_environments/SC_storage_env.py b/utilities/rl_environments/SC_storage_env.py
index d007ae6..be34420 100644
--- a/utilities/rl_environments/SC_storage_env.py
+++ b/utilities/rl_environments/SC_storage_env.py
@@ -10,7 +10,7 @@ class StorageEnv(gym.Env):
     reward_range = (-float("inf"), float("inf"))
     spec = None
 
-    def __init__(self, config):
+    def __init__(self, config, charging_hub=None, env=None):
         # Set these in ALL subclasses
         self.action_space = spaces.Box(low=250, high=800, shape=(1,), dtype=np.float64)
         self.observation_space = spaces.Box(
@@ -19,8 +19,8 @@ def __init__(self, config):
             shape=(config.number_chargers * 3 + 24 + 5 + 5,),
             dtype=np.float64,
         )
-        self.charging_hub = None
-        self.env = None
+        self.charging_hub = charging_hub
+        self.env = env
         self.id = 1
         self.episode = 0
         # vehicles_to_decide = [vehicle for vehicle in self.fleet.vehicles if vehicle.mode in ['idle','parking','circling']][0:10]
@@ -104,7 +104,7 @@ def get_state(self, charging_hub=None, env=None):
         # print(len(state))
         return state
 
-    def step(self, action, charging_hub=None, env=None):
+    def step(self, action):
         # Execute one time step within the environment
         # the first action is charging/discharging of the battery
         # storage_power = action[0]
@@ -119,9 +119,9 @@ def step(self, action, charging_hub=None, env=None):
         #     if len(charging_vehicles) > 0:
         #         charging_vehicles[0].charging_power = action[i+1]
         self.current_step += 1
-        reward = self._take_action(action, charging_hub, env)
+        reward = self._take_action(action)
         done = self.current_step >= 100000000000000
-        obs = self._next_observation(charging_hub, env)
+        obs = self._next_observation()
         return obs, reward, done, {}
 
     def receive_action(self):
@@ -140,21 +140,21 @@ def reset(self):
     def render(self, mode="human", close=False):
         print(self.reward)
 
-    def _take_action(self, action, charging_hub, env):
+    def _take_action(self, action):
         #
         # state = state.reshape((1, self._state_size))
         # lg.info(f'old_state={fleet.old_state}, old_action={fleet.old_action}')
         # lg.info(f'new_action={action}, new_state={state}, {fleet.charging_count}')
         reward = 0
-        reward -= charging_hub.reward["missed"]
+        reward -= self.charging_hub.reward["missed"]
 
-        charging_hub.reward["missed"] = 0
+        self.charging_hub.reward["missed"] = 0
         ### TODO add the energy rewards to reward["costs"]
         # charging_hub.grid.energy_rewards = 0
-        charging_hub.reward["feasibility"] = 0
-        charging_hub.reward["feasibility_storage"] = 0
+        self.charging_hub.reward["feasibility"] = 0
+        self.charging_hub.reward["feasibility_storage"] = 0
 
         return reward
 
-    def _next_observation(self, charging_hub, env):
-        return self.get_state(charging_hub, env)
+    def _next_observation(self):
+        return self.get_state(self.charging_hub, self.env)
diff --git a/utilities/rl_environments/evch_gym_env.py b/utilities/rl_environments/evch_gym_env.py
new file mode 100644
index 0000000..9840ea4
--- /dev/null
+++ b/utilities/rl_environments/evch_gym_env.py
@@ -0,0 +1,670 @@
+import gym
+from gym import spaces
+import numpy as np
+from typing import Dict, Any, Optional, Tuple, Union
+from dataclasses import dataclass
+from enum import Enum
+
+from utilities.rl_environments.rl_pricing_env import PricingEnv
+from utilities.rl_environments.SC_env import ChargingHubInvestmentEnv
+from utilities.rl_environments.SC_storage_env import StorageEnv
+
+
+class AgentType(Enum):
+    """Enumeration of available agent types."""
+    PRICING = "pricing"
+    CHARGING = "charging"
+    STORAGE = "storage"
+
+
+@dataclass
+class EVCHConfig:
+    """Configuration for the EVCH gym environment."""
+    agent_type: AgentType
+    number_chargers: int
+    number_power_options: int
+    maximum_power: float
+    maximum_grid_usage: float
+    evaluation: bool = False
+    pricing_mode: str = "Continuous"
+    dynamic_fix_term_pricing: bool = False
+    capacity_pricing: bool = False
+    dynamic_parking_fee: bool = False
+    limiting_grid_capa: bool = False
+    dynamic_storage_scheduling: bool = False
+
+
+
+class EVCHGymEnv(gym.Env):
+    """
+    Unified gym environment for EV Charging Hub operations.
+    
+    This environment completely decouples RL agents from the simulation,
+    providing a standard gym interface that can be used with any gym-compatible
+    RL library (Stable Baselines3, RLlib, etc.).
+    
+    The environment can be configured for different agent types:
+    - PRICING: Dynamic pricing decisions
+    - CHARGING: Charging optimization decisions  
+    - STORAGE: Energy storage management decisions
+    """
+    
+    metadata = {"render_modes": ["human"], "render_fps": 4}
+    
+    def __init__(self, config: EVCHConfig, charging_hub: Optional[Any] = None, sim_env: Optional[Any] = None):
+        """
+        Initialize the EVCH gym environment.
+        
+        Args:
+            config: Configuration object specifying agent type and parameters
+            charging_hub: Reference to the charging hub (will be set later if None)
+            sim_env: Reference to the simulation environment (will be set later if None)
+        """
+        super().__init__()
+        
+        self.config = config
+        self.agent_type = config.agent_type
+        self.charging_hub = charging_hub
+        self.sim_env = sim_env
+        
+        # Initialize the appropriate underlying environment
+        self._init_underlying_env()
+        
+        # Set observation and action spaces
+        self._set_spaces()
+        
+        # State tracking
+        self.current_state = None
+        self.current_action = None
+        self.current_reward = 0.0
+        self.done = False
+        self.info = {}
+        
+        # Episode tracking
+        self.episode_step = 0
+        self.max_episode_steps = 1000  # Configurable
+        
+    def _init_underlying_env(self):
+        """Initialize the underlying environment based on agent type."""
+        if self.agent_type == AgentType.PRICING:
+            self.underlying_env = PricingEnv(self.config, DQN=False, charging_hub=None, env=None)
+        elif self.agent_type == AgentType.CHARGING:
+            self.underlying_env = ChargingHubInvestmentEnv(self.config, charging_hub=None, env=None)
+        elif self.agent_type == AgentType.STORAGE:
+            self.underlying_env = StorageEnv(self.config, charging_hub=None, env=None)
+        else:
+            raise ValueError(f"Unknown agent type: {self.agent_type}")
+    
+    def _set_spaces(self):
+        """Set observation and action spaces based on the underlying environment."""
+        # Use the underlying environment's spaces
+        self.observation_space = self.underlying_env.observation_space
+        self.action_space = self.underlying_env.action_space
+    
+    def set_simulation_context(self, charging_hub: Any, sim_env: Any):
+        """
+        Set the simulation context (charging hub and environment).
+        
+        This method allows the gym environment to be connected to the actual
+        simulation without tight coupling.
+        
+        Args:
+            charging_hub: The charging hub object
+            sim_env: The simulation environment
+        """
+        self.charging_hub = charging_hub
+        self.sim_env = sim_env
+        self.underlying_env.charging_hub = charging_hub
+        self.underlying_env.env = sim_env
+    
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
+        """
+        Reset the environment to initial state.
+        
+        Args:
+            seed: Random seed for reproducibility
+            options: Additional options for reset
+            
+        Returns:
+            Tuple of (observation, info)
+        """
+        super().reset(seed=seed)
+        
+        # Reset underlying environment
+        if hasattr(self.underlying_env, 'reset'):
+            self.current_state = self.underlying_env.reset()
+        else:
+            # Fallback: get initial state
+            self.current_state = self._get_state()
+        
+        # Reset episode tracking
+        self.episode_step = 0
+        self.done = False
+        self.current_reward = 0.0
+        self.info = {}
+        
+        return self.current_state, self.info
+    
+    def step(self, action: Union[np.ndarray, int]) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        """
+        Take a step in the environment.
+        
+        Args:
+            action: The action to take (numpy array or int)
+            
+        Returns:
+            Tuple of (observation, reward, terminated, truncated, info)
+        """
+        if self.charging_hub is None or self.sim_env is None:
+            raise RuntimeError("Simulation context not set. Call set_simulation_context() first.")
+        
+        # Store current action
+        self.current_action = action
+        
+        # Apply action based on agent type
+        reward = self._apply_action(action)
+        
+        # Get new state with error handling
+        try:
+            next_state = self._get_state()
+        except Exception as e:
+            # If we can't get the state (e.g., simulation ended), return zeros and mark as terminated
+            print(f"Could not get state: {e}. Marking episode as terminated.")
+            next_state = np.zeros(self.observation_space.shape[0])
+            terminated = True
+            truncated = False
+            self.done = True
+            info = {
+                "agent_type": self.agent_type.value,
+                "episode_step": self.episode_step,
+                "action": action,
+                "reward": reward,
+                "error": str(e)
+            }
+            return next_state, reward, terminated, truncated, info
+        
+        # Update state
+        self.current_state = next_state
+        self.current_reward = reward
+        self.episode_step += 1
+        
+        # Check if episode is done
+        terminated = self._is_episode_done()
+        truncated = self.episode_step >= self.max_episode_steps
+        self.done = terminated or truncated
+        
+        # Prepare info
+        info = {
+            "agent_type": self.agent_type.value,
+            "episode_step": self.episode_step,
+            "action": action,
+            "reward": reward
+        }
+        
+        return next_state, reward, terminated, truncated, info
+    
+    def _get_state(self) -> np.ndarray:
+        """Get the current state from the underlying environment."""
+        if hasattr(self.underlying_env, 'get_state'):
+            return self.underlying_env.get_state(self.charging_hub, self.sim_env)
+        else:
+            # Fallback: return zeros if no get_state method
+            return np.zeros(self.observation_space.shape[0])
+    
+    def _apply_action(self, action: Union[np.ndarray, int]) -> float:
+        """
+        Apply the action and return the reward.
+        
+        Args:
+            action: The action to apply
+            
+        Returns:
+            The reward received
+        """
+        if self.agent_type == AgentType.PRICING:
+            return self._apply_pricing_action(action)
+        elif self.agent_type == AgentType.CHARGING:
+            return self._apply_charging_action(action)
+        elif self.agent_type == AgentType.STORAGE:
+            return self._apply_storage_action(action)
+        else:
+            raise ValueError(f"Unknown agent type: {self.agent_type}")
+    
+    def _apply_pricing_action(self, action: Union[np.ndarray, int]) -> float:
+        """Apply pricing action and return reward."""
+        # Store action in the charging hub's pricing agent if it exists
+        if hasattr(self.charging_hub, 'pricing_agent') and self.charging_hub.pricing_agent is not None:
+            self.charging_hub.pricing_agent.action = action
+        
+        # Apply the pricing action to the pricing parameters for continuous pricing
+        if hasattr(self.charging_hub, 'operator'):
+            try:
+                # Get the pricing mode from configuration
+                from resources.configuration.configuration import Configuration
+                config = Configuration.instance()
+                pricing_mode = getattr(config, 'pricing_mode', 'Continuous')
+                
+                if pricing_mode == "Continuous":
+                    # For continuous pricing, apply the action to pricing_parameters
+                    if hasattr(self.charging_hub.operator, 'pricing_parameters'):
+                        # Apply action to pricing parameters:
+                        # pricing_parameters[0] = fixed term (p_0) - energy price component
+                        # pricing_parameters[1] = rate-based term (alpha) - capacity price component
+                        if len(action) >= 2:
+                            self.charging_hub.operator.pricing_parameters[0] = action[0]  # Fixed term (p_0)
+                            self.charging_hub.operator.pricing_parameters[1] = action[1]  # Rate-based term (alpha)
+                        elif len(action) == 1:
+                            self.charging_hub.operator.pricing_parameters[0] = action[0]  # Fixed term (p_0)
+                    
+                    # For continuous pricing, we don't use price_pairs directly
+                    # Instead, vehicles calculate their own price using the price_function:
+                    # price = p_0 + alpha * power^degree
+                    print(f"Applied continuous pricing action {action} to pricing_parameters: {self.charging_hub.operator.pricing_parameters}")
+                
+                elif pricing_mode == "Discrete":
+                    # For discrete pricing, apply to price_pairs
+                    if hasattr(self.charging_hub.operator, 'price_pairs'):
+                        from utilities.rl_environments.rl_pricing_env import convert_to_vector
+                        if isinstance(action, (int, np.integer)):
+                            vector_prices = convert_to_vector(action)
+                            for i, price in enumerate(vector_prices):
+                                if i < len(self.charging_hub.operator.price_pairs):
+                                    self.charging_hub.operator.price_pairs[i, 1] = price
+                        else:
+                            # Action is already a vector
+                            for i, price in enumerate(action):
+                                if i < len(self.charging_hub.operator.price_pairs):
+                                    self.charging_hub.operator.price_pairs[i, 1] = price
+                        print(f"Applied discrete pricing action {action} to price_pairs: {self.charging_hub.operator.price_pairs}")
+                
+            except Exception as e:
+                print(f"Failed to apply pricing action: {e}")
+        
+        # Advance the simulation by one planning interval (typically 60 minutes)
+        planning_interval = getattr(self.charging_hub, 'planning_interval', 60)
+        current_time = self.sim_env.now
+        next_time = current_time + planning_interval
+        
+        # Run the simulation until the next time step
+        try:
+            self.sim_env.run(until=next_time)
+        except Exception as e:
+            # If simulation fails (e.g., data not available), mark as terminated
+            print(f"Simulation ended at time {self.sim_env.now} due to data limits")
+            return 0.0  # Return neutral reward
+        
+        # Choose reward calculation method based on configuration
+        if self.config.use_comprehensive_rewards:
+            # Use comprehensive reward calculation that consolidates simulation-based logic
+            reward = self._calculate_comprehensive_reward()
+        else:
+            # Use original simulation-based reward calculation
+            reward = self._calculate_simulation_based_reward()
+        return reward
+    
+    def _apply_charging_action(self, action: np.ndarray) -> float:
+        """Apply charging action and return reward."""
+        # Store action in the charging hub's charging agent if it exists
+        if hasattr(self.charging_hub, 'charging_agent') and self.charging_hub.charging_agent is not None:
+            self.charging_hub.charging_agent.action = action
+        
+        # Advance the simulation by one planning interval (typically 60 minutes)
+        planning_interval = getattr(self.charging_hub, 'planning_interval', 60)
+        current_time = self.sim_env.now
+        next_time = current_time + planning_interval
+        
+        # Run the simulation until the next time step
+        try:
+            self.sim_env.run(until=next_time)
+        except Exception as e:
+            # If simulation fails (e.g., data not available), mark as terminated
+            print(f"Simulation ended at time {self.sim_env.now} due to data limits")
+            return 0.0  # Return neutral reward
+        
+        # Choose reward calculation method based on configuration
+        if self.config.use_comprehensive_rewards:
+            # Use comprehensive reward calculation that consolidates simulation-based logic
+            reward = self._calculate_comprehensive_reward()
+        else:
+            # Use original simulation-based reward calculation
+            reward = self._calculate_simulation_based_reward()
+        return reward
+    
+    def _apply_storage_action(self, action: np.ndarray) -> float:
+        """Apply storage action and return reward."""
+        # Store action in the charging hub's storage agent if it exists
+        if hasattr(self.charging_hub, 'storage_agent') and self.charging_hub.storage_agent is not None:
+            self.charging_hub.storage_agent.action = action
+        
+        # Advance the simulation by one planning interval (typically 60 minutes)
+        planning_interval = getattr(self.charging_hub, 'planning_interval', 60)
+        current_time = self.sim_env.now
+        next_time = current_time + planning_interval
+        
+        # Run the simulation until the next time step
+        try:
+            self.sim_env.run(until=next_time)
+        except Exception as e:
+            # If simulation fails (e.g., data not available), mark as terminated
+            print(f"Simulation ended at time {self.sim_env.now} due to data limits")
+            return 0.0  # Return neutral reward
+        
+        # Choose reward calculation method based on configuration
+        if self.config.use_comprehensive_rewards:
+            # Use comprehensive reward calculation that consolidates simulation-based logic
+            reward = self._calculate_comprehensive_reward()
+        else:
+            # Use original simulation-based reward calculation
+            reward = self._calculate_simulation_based_reward()
+        return reward
+    
+    def _is_episode_done(self) -> bool:
+        """Check if the episode is done."""
+        if hasattr(self.underlying_env, 'done'):
+            return self.underlying_env.done
+        else:
+            # Fallback: episode is never done by default
+            return False
+    
+    def render(self, mode: str = "human"):
+        """Render the environment (placeholder for now)."""
+        if mode == "human":
+            print(f"EVCH Environment - Agent: {self.agent_type.value}, Step: {self.episode_step}")
+        return None
+    
+    def close(self):
+        """Close the environment."""
+        if hasattr(self.underlying_env, 'close'):
+            self.underlying_env.close()
+
+    def _calculate_comprehensive_reward(self) -> float:
+        """
+        Calculate comprehensive reward by moving simulation-based reward logic to the RL environment.
+        
+        This method consolidates reward calculation that was previously scattered across
+        simulation components, making the RL environment more self-contained.
+        
+        Returns:
+            float: Calculated reward value
+        """
+        if not self.charging_hub or not hasattr(self.charging_hub, 'operator'):
+            return 0.0
+        
+        try:
+            reward = 0.0
+            operator = self.charging_hub.operator
+            
+            # 1. GRID USAGE AND PEAK PENALTIES
+            current_grid_usage = max(self.charging_hub.grid.grid_usage) if self.charging_hub.grid.grid_usage else 0
+            current_peak_threshold = operator.peak_threshold
+            
+            # Peak threshold violation penalty (from operator.reward_computing)
+            if current_grid_usage > current_peak_threshold:
+                peak_penalty = (current_grid_usage - current_peak_threshold) * operator.peak_cost
+                reward -= peak_penalty
+                # Update peak threshold (as done in simulation)
+                operator.peak_threshold = current_grid_usage
+            
+            # 2. OBJECTIVE FUNCTION-BASED REWARD (from simulation model)
+            # Calculate revenue and costs similar to simulation model
+            total_revenue = 0.0
+            total_energy_costs = self.charging_hub.grid.energy_costs if hasattr(self.charging_hub.grid, 'energy_costs') else 0.0
+            
+            # Calculate revenue from served vehicles
+            requests = [r for r in operator.requests if r.ev == 1]
+            for request in requests:
+                if request.is_assigned and request.energy_requested > 0:
+                    # Revenue from energy charged
+                    energy_charged = min(request.energy_charged, request.energy_requested)
+                    total_revenue += energy_charged * request.charging_price
+                    
+                    # Revenue from parking fees
+                    total_revenue += request.park_duration * request.parking_fee
+                    
+                    # Penalty for missed energy (from simulation model)
+                    energy_missed = max(request.energy_requested - request.energy_charged, 0)
+                    if energy_missed > 0:
+                        from resources.configuration.configuration import Configuration
+                        missed_penalty = energy_missed * request.charging_price * Configuration.instance().energy_missed_penalty
+                        total_revenue -= missed_penalty
+            
+            # Calculate operational costs
+            operational_costs = total_energy_costs
+            
+            # Peak charges (if applicable)
+            if hasattr(self.charging_hub, 'prices') and 'peak' in self.charging_hub.prices:
+                peak_charge = max((current_grid_usage - operator.peak_threshold) * self.charging_hub.prices['peak'], 0)
+                operational_costs += peak_charge
+            
+            # 3. OBJECTIVE FUNCTION CALCULATION (from simulation model)
+            if hasattr(self.charging_hub, 'objective'):
+                if self.charging_hub.objective == "min_costs":
+                    # Calculate missed energy penalty
+                    total_energy_missed = sum(
+                        max(r.energy_requested - r.energy_charged, 0) 
+                        for r in requests if r.energy_requested > 0
+                    )
+                    missed_penalty = total_energy_missed * getattr(self.charging_hub, 'penalty_for_missed_kWh', 1.0)
+                    objective_value = missed_penalty + operational_costs
+                elif self.charging_hub.objective == "max_profits":
+                    objective_value = total_revenue - operational_costs
+                else:
+                    objective_value = total_revenue - operational_costs
+            else:
+                objective_value = total_revenue - operational_costs
+            
+            # 4. OBJECTIVE FUNCTION CHANGE REWARD (from operator.reward_computing)
+            if hasattr(operator, 'objective'):
+                objective_change = objective_value - operator.objective
+                reward -= objective_change  # Negative because we want to minimize costs/maximize profits
+                operator.objective = objective_value
+            else:
+                operator.objective = objective_value
+            
+            # 5. SERVICE LEVEL REWARD (from simulation model)
+            served_requests = sum(1 for r in requests if r.energy_requested > 0 and r.energy_charged > 0)
+            total_requests = sum(1 for r in requests if r.energy_requested > 0)
+            if total_requests > 0:
+                service_level = served_requests / total_requests
+                service_reward = service_level * 0.5  # Reward for good service level
+                reward += service_reward
+            
+            # 6. PRICING-SPECIFIC REWARDS (existing logic)
+            if self.agent_type == AgentType.PRICING and hasattr(operator, 'pricing_parameters'):
+                pricing_params = operator.pricing_parameters
+                if len(pricing_params) >= 2:
+                    base_price = pricing_params[0]
+                    capacity_price = pricing_params[1]
+                    
+                    # Count rejected vehicles due to pricing
+                    rejected_vehicles = sum(1 for r in requests 
+                                          if r.ev == 1 and r.energy_requested == 0 and r.charging_price > 0)
+                    
+                    # Strong penalty for pricing that causes vehicle rejections
+                    if rejected_vehicles > 0:
+                        rejection_penalty = rejected_vehicles * 0.5
+                        reward -= rejection_penalty
+                    
+                    # Penalty for extreme pricing
+                    if base_price > 1.2 or base_price < 0.3:
+                        reward -= 0.3
+                    elif 0.4 <= base_price <= 1.0:
+                        reward += 0.3
+                    
+                    if capacity_price > 0.3 or capacity_price < 0.01:
+                        reward -= 0.2
+                    elif 0.02 <= capacity_price <= 0.2:
+                        reward += 0.2
+            
+            # 7. GRID EFFICIENCY REWARD (existing logic)
+            if current_grid_usage <= current_peak_threshold:
+                efficiency_ratio = current_grid_usage / current_peak_threshold
+                if 0.7 <= efficiency_ratio <= 0.9:
+                    reward += 0.3
+                elif efficiency_ratio < 0.3:
+                    reward -= 0.1
+                elif efficiency_ratio > 0.95:
+                    reward -= 0.2
+            
+            # 8. VEHICLE SERVICE REWARD (existing logic)
+            active_vehicles = sum(1 for r in requests if r.mode in ["Connected", "Charging"])
+            if active_vehicles > 0:
+                service_reward = min(active_vehicles * 0.3, 2.0)
+                reward += service_reward
+            
+            # 9. COMPLETION REWARD (existing logic)
+            completed_vehicles = sum(1 for r in requests if r.mode in ["Fully_charged", "Left"])
+            completion_reward = completed_vehicles * 0.1
+            reward += completion_reward
+            
+            # 10. TIME-BASED REWARD (existing logic)
+            time_factor = (self.sim_env.now % 1440) / 1440
+            time_reward = 0.1 * time_factor
+            reward += time_reward
+            
+            return reward
+            
+        except Exception as e:
+            print(f"Comprehensive reward calculation failed: {e}")
+            return 0.0
+
+    def _calculate_simulation_based_reward(self) -> float:
+        """
+        Calculate reward using the original simulation-based approach.
+        
+        This method maintains backward compatibility with the existing
+        simulation-based reward calculation logic.
+        
+        Returns:
+            float: Calculated reward value
+        """
+        # This method contains the original reward calculation logic
+        # that was previously in the pricing action method
+        if hasattr(self.charging_hub, 'operator'):
+            try:
+                # Get current state metrics
+                current_grid_usage = max(self.charging_hub.grid.grid_usage) if self.charging_hub.grid.grid_usage else 0
+                current_peak_threshold = self.charging_hub.operator.peak_threshold
+                
+                # Count active vehicles (charging or connected)
+                active_vehicles = sum(1 for request in self.charging_hub.operator.requests 
+                                    if request.mode in ["Connected", "Charging"] and request.ev == 1)
+                
+                # Count completed vehicles (fully charged or left)
+                completed_vehicles = sum(1 for request in self.charging_hub.operator.requests 
+                                       if request.mode in ["Fully_charged", "Left"] and request.ev == 1)
+                
+                # Count vehicles that rejected charging due to high prices
+                rejected_vehicles = sum(1 for request in self.charging_hub.operator.requests 
+                                      if request.ev == 1 and request.energy_requested == 0 and request.charging_price > 0)
+                
+                # Calculate dynamic reward components
+                reward = 0.0
+                
+                # 1. Grid usage penalty (negative reward for exceeding peak threshold)
+                if current_grid_usage > current_peak_threshold:
+                    penalty = (current_grid_usage - current_peak_threshold) * 0.1
+                    reward -= penalty
+                
+                # 2. Dynamic service reward based on current pricing action
+                if hasattr(self.charging_hub.operator, 'pricing_parameters'):
+                    pricing_params = self.charging_hub.operator.pricing_parameters
+                    if len(pricing_params) >= 2:
+                        # Reward for optimal pricing (not too high, not too low)
+                        base_price = pricing_params[0]
+                        capacity_price = pricing_params[1]
+                        
+                        # Strong penalty for pricing that causes vehicle rejections
+                        if rejected_vehicles > 0:
+                            rejection_penalty = rejected_vehicles * 0.5  # Strong penalty per rejected vehicle
+                            reward -= rejection_penalty
+                        
+                        # Penalty for extreme pricing
+                        if base_price > 1.2 or base_price < 0.3:
+                            reward -= 0.3
+                        elif 0.4 <= base_price <= 1.0:
+                            reward += 0.3  # Reward for reasonable pricing
+                        
+                        if capacity_price > 0.3 or capacity_price < 0.01:
+                            reward -= 0.2
+                        elif 0.02 <= capacity_price <= 0.2:
+                            reward += 0.2  # Reward for reasonable capacity pricing
+                
+                # 3. Grid efficiency reward (varies based on usage)
+                if current_grid_usage <= current_peak_threshold:
+                    # Reward for efficient grid usage (closer to threshold = better)
+                    efficiency_ratio = current_grid_usage / current_peak_threshold
+                    if 0.7 <= efficiency_ratio <= 0.9:
+                        reward += 0.3  # Sweet spot for efficiency
+                    elif efficiency_ratio < 0.3:
+                        reward -= 0.1  # Too low usage
+                    elif efficiency_ratio > 0.95:
+                        reward -= 0.2  # Too close to limit
+                
+                # 4. Vehicle service reward (varies based on demand)
+                if active_vehicles > 0:
+                    # Reward for serving vehicles, but with diminishing returns
+                    service_reward = min(active_vehicles * 0.3, 2.0)  # Cap at 2.0
+                    reward += service_reward
+                
+                # 5. Completion reward (small incremental reward)
+                completion_reward = completed_vehicles * 0.1  # Smaller reward per completion
+                reward += completion_reward
+                
+                # 6. Time-based reward variation (encourage progress)
+                time_factor = (self.sim_env.now % 1440) / 1440  # Normalize to 0-1 over day
+                time_reward = 0.1 * time_factor  # Small time-based reward
+                reward += time_reward
+                
+                return reward
+                
+            except Exception as e:
+                # If reward computation fails, return neutral reward
+                print(f"Simulation-based reward computation failed: {e}")
+                return 0.0
+        else:
+            # Fallback: return 0 reward
+            return 0.0
+
+
+# Factory functions for easy environment creation
+def make_pricing_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv:
+    """Create a pricing environment."""
+    config = EVCHConfig(agent_type=AgentType.PRICING, **config_dict)
+    return EVCHGymEnv(config, **kwargs)
+
+
+def make_charging_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv:
+    """Create a charging environment."""
+    config = EVCHConfig(agent_type=AgentType.CHARGING, **config_dict)
+    return EVCHGymEnv(config, **kwargs)
+
+
+def make_storage_env(config_dict: Dict[str, Any], **kwargs) -> EVCHGymEnv:
+    """Create a storage environment."""
+    config = EVCHConfig(agent_type=AgentType.STORAGE, **config_dict)
+    return EVCHGymEnv(config, **kwargs)
+
+
+# Register environments with gym (optional, for gym.make() support)
+try:
+    from gym.envs.registration import register
+    
+    register(
+        id='EVCH-Pricing-v0',
+        entry_point='utilities.rl_environments.evch_gym_env:make_pricing_env',
+    )
+    
+    register(
+        id='EVCH-Charging-v0',
+        entry_point='utilities.rl_environments.evch_gym_env:make_charging_env',
+    )
+    
+    register(
+        id='EVCH-Storage-v0',
+        entry_point='utilities.rl_environments.evch_gym_env:make_storage_env',
+    )
+    
+except ImportError:
+    # gym registration not available
+    pass
diff --git a/utilities/rl_environments/rl_pricing_env.py b/utilities/rl_environments/rl_pricing_env.py
index 2660911..c7c6260 100644
--- a/utilities/rl_environments/rl_pricing_env.py
+++ b/utilities/rl_environments/rl_pricing_env.py
@@ -3,409 +3,627 @@
 import numpy as np
 import logging
 import pandas as pd
+from typing import Optional, Dict, Any, Tuple, Union
+from dataclasses import dataclass
 
 from resources.configuration.configuration import Configuration
 
-k = 5
+
+@dataclass
+class PricingState:
+    """Data class for pricing environment state information."""
+    storage_soc: float
+    pv_generation: float
+    electricity_price: float
+    peak_usage: float
+    avg_energy_demand: float
+    avg_power_demand: float
+    free_grid_capacity: float
+
+
+@dataclass
+class PricingConfig:
+    """Data class for pricing environment configuration."""
+    number_power_options: int
+    maximum_power: float
+    evaluation: bool
+    pricing_mode: str
+    dynamic_fix_term_pricing: bool
+    capacity_pricing: bool
+    dynamic_parking_fee: bool
+    limiting_grid_capa: bool
+    dynamic_storage_scheduling: bool
 
 
 class PricingEnv(gym.Env):
+    """
+    Gym environment for dynamic pricing in EV charging hub operations.
+    
+    This environment provides a standardized interface for RL agents to learn
+    optimal pricing strategies for EV charging services.
+    """
+    
     metadata = {"render.modes": ["human"]}
     reward_range = (-float("inf"), float("inf"))
     spec = None
-
-    def __init__(self, config, DQN=False):
-        # Set these in ALL subclasses
-        self.final_action_DQN = None
-        if DQN == True:
-            # self.action_space = spaces.Discrete(2*2*2*4*4*5*2*3*3)
-            self.action_space = spaces.Discrete(k**config.number_power_options)
-        else:
-            number_of_actions = config.number_power_options - 1
-            if (
-                Configuration.instance().dynamic_fix_term_pricing
-                and Configuration.instance().capacity_pricing
-            ):
-                number_of_actions = config.number_power_options
-            if Configuration.instance().dynamic_parking_fee:
-                number_of_actions = config.number_power_options
-            if Configuration.instance().limiting_grid_capa:
-                number_of_actions = config.number_power_options
-            if Configuration.instance().dynamic_storage_scheduling:
-                number_of_actions = config.number_power_options
-            self.action_space = spaces.Box(
-                low=0,
-                high=config.maximum_power,
-                shape=(number_of_actions,),
-                dtype=np.float64,
-            )
-            if Configuration.instance().pricing_mode == "Discrete":
-                # action_size = config.number_power_options
-                action_size = 2
-                self.action_space = spaces.Box(
-                    low=0,
-                    high=config.maximum_power,
-                    shape=(action_size,),
-                    dtype=np.float64,
-                )
-                self.action_space.low[0] = 0.3
-                self.action_space.high[0] = 1.5
-                self.action_space.low[1] = 0.5
-                self.action_space.high[1] = 1.5
-                # TODO: hard coded
-                if config.number_power_options >= 3:
-                    self.action_space.low[2], self.action_space.high[2] = 300, 800
-                if config.number_power_options >= 4:
-                    self.action_space.low[3], self.action_space.high[3] = -200, 200
-
-            if Configuration.instance().pricing_mode == "Continuous":
-                self.action_space.low[0] = 0
-                self.action_space.high[0] = 1.5
-                if Configuration.instance().limiting_grid_capa:
-                    self.action_space.low[1] = 300
-                    self.action_space.high[1] = 600
-                if Configuration.instance().dynamic_storage_scheduling:
-                    self.action_space.low[1] = -200
-                    self.action_space.high[1] = +200
-                if (
-                    Configuration.instance().dynamic_fix_term_pricing
-                    and Configuration.instance().capacity_pricing
-                ):
-                    self.action_space.low[0] = 0.5
-                    self.action_space.high[0] = 1.5
-                    self.action_space.low[1] = 0
-                    self.action_space.high[1] = 0.4
-                if (
-                    Configuration.instance().dynamic_fix_term_pricing
-                    and not Configuration.instance().capacity_pricing
-                ):
-                    self.action_space.low[0] = 0.6
-                    self.action_space.high[0] = 1.5
-                    if Configuration.instance().dynamic_parking_fee:
-                        self.action_space.low[1] = 0
-                        self.action_space.high[1] = 1 / 60
-                # self.action_space.low[1] = 0.01
-                # self.action_space.high[0] = 0.2
-                # self.action_space.high[1] = 0.03
-
-        # self.observation_space = spaces.Box(low=0, high=1000000, shape=
-        # (config.number_chargers * 3 + 2 + 4, ), dtype=np.float64)
-        observation_shape = 2 + 3 + 2
-        if Configuration.instance().dynamic_storage_scheduling:
-            observation_shape += 1
-        self.observation_space = spaces.Box(
-            low=0, high=1000000, shape=(observation_shape,), dtype=np.float64
-        )
-        self.charging_hub = None
-        self.env = None
-        self.id = 1
-        self.episode = 0
-        self.current_step = 0
-        self.reward = 0
-        self._max_episode_steps = 50000000
+    
+    # Constants
+    K = 5  # Base for discrete action encoding
+    MAX_EPISODE_STEPS = 50000000
+    
+    def __init__(self, config: Any, DQN: bool = False, charging_hub: Optional[Any] = None, env: Optional[Any] = None):
+        """
+        Initialize the pricing environment.
+        
+        Args:
+            config: Configuration object containing environment parameters
+            DQN: Whether to use discrete action space for DQN
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+        """
+        super().__init__()
+        
         self.config = config
         self.evaluation = config.evaluation
-        self.total_reward = dict(
-            missed=0, feasibility=0, energy=0, feasibility_storage=0, test=0
-        )
-        self.config = config
+        self.is_dqn = DQN
+        
+        # Single reward calculation approach
+        # No longer using comprehensive rewards - keeping it simple
+        
+        # Initialize action and observation spaces
+        self._init_action_space()
+        self._init_observation_space()
+        
+        # Environment state
+        self.charging_hub = charging_hub
+        self.env = env
+        self.current_step = 0
+        self.reward = 0.0
         self.action = None
+        self.final_action_DQN = None
+        
+        # Episode tracking
+        self.episode = 0
+        self.total_reward = {
+            "profit": 0,
+            "feasibility": 0,
+            "energy": 0,
+            "feasibility_storage": 0,
+            "test": 0
+        }
+        
+        # Action range for continuous actions
         if not DQN:
             self.action_range = [self.action_space.low, self.action_space.high]
-
-    def rescale_action(self, action):
+    
+    def _init_action_space(self) -> None:
+        """Initialize the action space based on configuration."""
+        if self.is_dqn:
+            self._init_discrete_action_space()
+        else:
+            self._init_continuous_action_space()
+    
+    def _init_discrete_action_space(self) -> None:
+        """Initialize discrete action space for DQN."""
+        self.action_space = spaces.Discrete(self.K ** self.config.number_power_options)
+    
+    def _init_continuous_action_space(self) -> None:
+        """Initialize continuous action space for other algorithms."""
+        number_of_actions = self._calculate_number_of_actions()
+        
+        self.action_space = spaces.Box(
+            low=0,
+            high=self.config.maximum_power,
+            shape=(number_of_actions,),
+            dtype=np.float64
+        )
+        
+        self._configure_action_space_bounds()
+    
+    def _calculate_number_of_actions(self) -> int:
+        """Calculate the number of actions based on configuration."""
+        config = Configuration.instance()
+        number_of_actions = self.config.number_power_options - 1
+        
+        # Adjust based on pricing features
+        if config.dynamic_fix_term_pricing and config.capacity_pricing:
+            number_of_actions = self.config.number_power_options
+        if config.dynamic_parking_fee:
+            number_of_actions = self.config.number_power_options
+        if config.limiting_grid_capa:
+            number_of_actions = self.config.number_power_options
+        if config.dynamic_storage_scheduling:
+            number_of_actions = self.config.number_power_options
+            
+        return number_of_actions
+    
+    def _configure_action_space_bounds(self) -> None:
+        """Configure action space bounds based on pricing mode."""
+        config = Configuration.instance()
+        
+        if config.pricing_mode == "Discrete":
+            self._configure_discrete_mode_bounds()
+        elif config.pricing_mode == "Continuous":
+            self._configure_continuous_mode_bounds()
+    
+    def _configure_discrete_mode_bounds(self) -> None:
+        """Configure bounds for discrete pricing mode."""
+        action_size = 2  # Default for discrete mode
+        self.action_space = spaces.Box(
+            low=0,
+            high=self.config.maximum_power,
+            shape=(action_size,),
+            dtype=np.float64
+        )
+        
+        # Set specific bounds for discrete pricing
+        self.action_space.low[0] = 0.3
+        self.action_space.high[0] = 1.5
+        self.action_space.low[1] = 0.5
+        self.action_space.high[1] = 1.5
+        
+        # Handle additional power options
+        if self.config.number_power_options >= 3:
+            self.action_space.low[2] = 300
+            self.action_space.high[2] = 800
+        if self.config.number_power_options >= 4:
+            self.action_space.low[3] = -200
+            self.action_space.high[3] = 200
+    
+    def _configure_continuous_mode_bounds(self) -> None:
+        """Configure bounds for continuous pricing mode."""
+        config = Configuration.instance()
+        
+        # Base bounds
+        self.action_space.low[0] = 0
+        self.action_space.high[0] = 1.5
+        
+        # Adjust based on features
+        if config.limiting_grid_capa:
+            self.action_space.low[1] = 300
+            self.action_space.high[1] = 600
+        if config.dynamic_storage_scheduling:
+            self.action_space.low[1] = -200
+            self.action_space.high[1] = 200
+        if config.dynamic_fix_term_pricing and config.capacity_pricing:
+            self.action_space.low[0] = 0.5
+            self.action_space.high[0] = 1.5
+            self.action_space.low[1] = 0
+            self.action_space.high[1] = 0.4
+        if config.dynamic_fix_term_pricing and not config.capacity_pricing:
+            self.action_space.low[0] = 0.6
+            self.action_space.high[0] = 1.5
+            if config.dynamic_parking_fee:
+                self.action_space.low[1] = 0
+                self.action_space.high[1] = 1 / 60
+    
+    def _init_observation_space(self) -> None:
+        """Initialize the observation space."""
+        observation_shape = self._calculate_observation_shape()
+        
+        self.observation_space = spaces.Box(
+            low=0,
+            high=1000000,
+            shape=(observation_shape,),
+            dtype=np.float64
+        )
+    
+    def _calculate_observation_shape(self) -> int:
+        """Calculate the observation space shape."""
+        base_shape = 2 + 3 + 2  # Time encoding + base features + demand features
+        
+        if Configuration.instance().dynamic_storage_scheduling:
+            base_shape += 1
+            
+        return base_shape
+    
+    def rescale_action(self, action: np.ndarray) -> np.ndarray:
+        """
+        Rescale action from normalized range to actual range.
+        
+        Args:
+            action: Normalized action from agent
+            
+        Returns:
+            Rescaled action in actual range
+        """
         return (
             action * (self.action_range[1] - self.action_range[0]) / 2.0
             + (self.action_range[1] + self.action_range[0]) / 2.0
         )
-
-    def get_final_prices_DQN(self, actions):
-
+    
+    def get_final_prices_DQN(self, actions: np.ndarray) -> np.ndarray:
+        """
+        Convert DQN actions to final pricing values.
+        
+        Args:
+            actions: Raw actions from DQN agent
+            
+        Returns:
+            Final pricing values
+        """
         final_action = actions.copy()
+        
         if len(actions) == 1:
-            for i in range(len(actions)):
-                final_action[i] = actions[i] * 0.1 + 0.3
-        if len(actions) == 2:
+            final_action[0] = actions[0] * 0.1 + 0.3
+        elif len(actions) == 2:
             for i in range(len(actions)):
                 final_action[i] = actions[i] * 0.1 + 0.4 * i + 0.2 * (1 - i)
+        
         self.final_action_DQN = final_action
         return final_action
-
-    # def get_state(self, charging_hub=None, env=None):
-    #     state = np.array([])
-    #     if not env:
-    #         hour = 0
-    #         hour = np.array(hour)
-    #         # hour = np.eye(24)[hour]
-    #
-    #         normalized_hour = hour / 24 / 4
-    #
-    #         # Map normalized hour to angle in radians
-    #         angle = normalized_hour * 2 * np.pi
-    #
-    #         # Encode angle using sinusoidal functions
-    #         sin_encoding = np.sin(angle)
-    #         cos_encoding = np.cos(angle)
-    #         day = 0
-    #         day = np.array(day)
-    #         day = np.eye(5)[day]
-    #     else:
-    #         hour = (env.now%1440 - env.now%charging_hub.planning_interval) / charging_hub.planning_interval
-    #             hour = np.array(int(hour))
-    #             normalized_hour = hour / 24 / (60/charging_hub.planning_interval)
-    #
-    #         # Map normalized hour to angle in radians
-    #         angle = normalized_hour * 2 * np.pi
-    #
-    #         # Encode angle using sinusoidal functions
-    #         sin_encoding = np.sin(angle)
-    #         cos_encoding = np.cos(angle)
-    #         # hour = np.eye(24)[hour]
-    #
-    #         day = (env.now - env.now % 1440)/1440
-    #         day = np.array(int(day))
-    #         day = np.eye(5)[day]
-    #     state = np.append(state, np.array([sin_encoding, cos_encoding]))
-    #
-    #     # state = np.append(state, np.array([day]))
-    #     if not charging_hub:
-    #         storage_SoC = 0
-    #         free_grid_capa = 0
-    #         PV = 0
-    #         electricity_price = 0
-    #         peak_usage = 0
-    #         avg_energy_demand = 0
-    #         avg_power_demand = 0
-    #         state = np.append(state, np.array([free_grid_capa, PV, electricity_price, peak_usage]))
-    #         for i in range(self.config.number_chargers):
-    #             for _ in range(4):
-    #                 energy_demand = 0
-    #                 charging_id = 0
-    #                 # Time of Departure
-    #                 ToD = 0
-    #                 state = np.append(state, np.array([energy_demand, ToD, charging_id]))
-    #     else:
-    #         storage_SoC = charging_hub.electric_storage.SoC
-    #         PV = charging_hub.operator.generation_min
-    #         hour = (env.now % 1440 - env.now % 60) / 60
-    #         electricity_price = charging_hub.electricity_tariff[int(hour)]
-    #         peak_usage = charging_hub.operator.peak_threshold
-    #         avg_energy_demand = 0
-    #         avg_power_demand = 0
-    #         if charging_hub.operator.free_grid_capa_actual == 0:
-    #             free_grid_capa = charging_hub.operator.free_grid_capa_actual
-    #         else:
-    #             free_grid_capa = charging_hub.operator.free_grid_capa_actual[0]
-    #
-    #         state = np.append(state, np.array([free_grid_capa / 1000, PV / 10, electricity_price, peak_usage / 500]))
-    #
-    #         for charger in charging_hub.chargers:
-    #             vehicles = charger.connected_vehicles
-    #             charger_state = np.zeros(charger.number_of_connectors * 3)
-    #             for j in range(len(vehicles)):
-    #                 charger_state[j * 3 + 0] = vehicles[j].remaining_energy_deficit / 50
-    #                 charger_state[j * 3 + 1] = vehicles[j].remaining_park_duration / 1000
-    #                 charger_state[j * 3 + 2] = vehicles[j].charging_price / 4
-    #             state = np.append(state, charger_state)
-    #
-    #     return state
-
-    def get_state(self, charging_hub=None, env=None):
+    
+    def get_state(self, charging_hub: Optional[Any] = None, env: Optional[Any] = None) -> np.ndarray:
+        """
+        Get the current state of the environment.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            State vector as numpy array
+        """
         state = np.array([])
+        
+        # Add time encoding
+        time_encoding = self._get_time_encoding(charging_hub, env)
+        state = np.append(state, time_encoding)
+        
+        # Add system state
+        system_state = self._get_system_state(charging_hub, env)
+        state = np.append(state, system_state)
+        
+        return state
+    
+    def _get_time_encoding(self, charging_hub: Optional[Any], env: Optional[Any]) -> np.ndarray:
+        """
+        Get time encoding for the current state.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            Time encoding vector
+        """
         if not env:
-            hour = 0
-            hour = np.array(hour)
-            # hour = np.eye(24)[hour]
-
-            normalized_hour = hour / 24 / 4
-
-            # Map normalized hour to angle in radians
-            angle = normalized_hour * 2 * np.pi
-
-            # Encode angle using sinusoidal functions
-            sin_encoding = np.sin(angle)
-            cos_encoding = np.cos(angle)
-            day = 0
-            day = np.array(day)
-            day = np.eye(5)[day]
+            return self._get_default_time_encoding()
         else:
-            hour = (
-                env.now % 1440 - env.now % charging_hub.planning_interval
-            ) / charging_hub.planning_interval
-            hour = np.array(int(hour))
-            normalized_hour = hour / 24 / (60 / charging_hub.planning_interval)
-
-            # Map normalized hour to angle in radians
-            angle = normalized_hour * 2 * np.pi
-
-            # Encode angle using sinusoidal functions
-            sin_encoding = np.sin(angle)
-            cos_encoding = np.cos(angle)
-            # hour = np.eye(24)[hour]
-
-            day = (env.now - env.now % 1440) / 1440
-            day = np.array(int(day))
-            day = np.eye(5)[day]
-        state = np.append(state, np.array([sin_encoding, cos_encoding]))
-
-        # state = np.append(state, np.array([day]))
+            return self._get_simulation_time_encoding(charging_hub, env)
+    
+    def _get_default_time_encoding(self) -> np.ndarray:
+        """Get default time encoding when no simulation is running."""
+        hour = 0
+        normalized_hour = hour / 24 / 4
+        angle = normalized_hour * 2 * np.pi
+        
+        sin_encoding = np.sin(angle)
+        cos_encoding = np.cos(angle)
+        
+        return np.array([sin_encoding, cos_encoding])
+    
+    def _get_simulation_time_encoding(self, charging_hub: Any, env: Any) -> np.ndarray:
+        """
+        Get time encoding from simulation.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            Time encoding vector
+        """
+        hour = (
+            env.now % 1440 - env.now % charging_hub.planning_interval
+        ) / charging_hub.planning_interval
+        hour = int(hour)
+        
+        normalized_hour = hour / 24 / (60 / charging_hub.planning_interval)
+        angle = normalized_hour * 2 * np.pi
+        
+        sin_encoding = np.sin(angle)
+        cos_encoding = np.cos(angle)
+        
+        return np.array([sin_encoding, cos_encoding])
+    
+    def _get_system_state(self, charging_hub: Optional[Any], env: Optional[Any]) -> np.ndarray:
+        """
+        Get system state information.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            System state vector
+        """
         if not charging_hub:
-            storage_SoC = 0
-            free_grid_capa = 0
-            PV = 0
-            electricity_price = 0
-            peak_usage = 0
-            avg_energy_demand = 0
-            avg_power_demand = 0
-            # state = np.append(state, np.array([free_grid_capa, PV, electricity_price, peak_usage, avg_energy_demand, avg_power_demand]))
-            if Configuration.instance().dynamic_storage_scheduling:
-                state = np.append(
-                    state,
-                    np.array(
-                        [
-                            storage_SoC,
-                            PV,
-                            electricity_price,
-                            peak_usage,
-                            avg_energy_demand,
-                            avg_power_demand,
-                        ]
-                    ),
-                )
-            else:
-                state = np.append(
-                    state,
-                    np.array(
-                        [
-                            PV,
-                            electricity_price,
-                            peak_usage,
-                            avg_energy_demand,
-                            avg_power_demand,
-                        ]
-                    ),
-                )
+            return self._get_default_system_state()
+        else:
+            return self._get_simulation_system_state(charging_hub, env)
+    
+    def _get_default_system_state(self) -> np.ndarray:
+        """Get default system state when no simulation is running."""
+        default_state = [0, 0, 0, 0, 0, 0]  # All zeros for default
+        
+        if Configuration.instance().dynamic_storage_scheduling:
+            return np.array(default_state)
+        else:
+            return np.array(default_state[1:])  # Exclude storage SoC
+    
+    def _get_simulation_system_state(self, charging_hub: Any, env: Any) -> np.ndarray:
+        """
+        Get system state from simulation.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            System state vector
+        """
+        pricing_state = self._extract_pricing_state(charging_hub, env)
+        
+        # Normalize values
+        normalized_state = [
+            pricing_state.storage_soc / 300,
+            pricing_state.pv_generation / 500,
+            pricing_state.electricity_price,
+            pricing_state.peak_usage / 1000,
+            pricing_state.avg_energy_demand / 1000,
+            pricing_state.avg_power_demand / 10
+        ]
+        
+        if Configuration.instance().dynamic_storage_scheduling:
+            return np.array(normalized_state)
         else:
-            storage_SoC = charging_hub.electric_storage.SoC
-            PV = charging_hub.operator.non_dispatchable_generator.generation_profile_actual.loc[
+            return np.array(normalized_state[1:])  # Exclude storage SoC
+    
+    def _extract_pricing_state(self, charging_hub: Any, env: Any) -> PricingState:
+        """
+        Extract pricing state from charging hub.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            env: Reference to the simulation environment
+            
+        Returns:
+            PricingState object
+        """
+        # Extract storage state
+        storage_soc = charging_hub.electric_storage.SoC
+        
+        # Extract PV generation with error handling
+        try:
+            pv_generation = charging_hub.operator.non_dispatchable_generator.generation_profile_actual.loc[
                 env.now, "pv_generation"
             ]
-            hour = (env.now % 1440 - env.now % 60) / 60
-            electricity_price = charging_hub.electricity_tariff[int(hour)]
-            peak_usage = charging_hub.operator.peak_threshold
-            avg_energy_demand = 0
-            avg_power_demand = 0
-            if charging_hub.operator.free_grid_capa_actual == 0:
-                free_grid_capa = charging_hub.operator.free_grid_capa_actual
-            else:
-                free_grid_capa = charging_hub.operator.free_grid_capa_actual[0]
-
-            for charger in charging_hub.chargers:
-                vehicles = charger.connected_vehicles
-                for j in range(len(vehicles)):
-                    avg_energy_demand += vehicles[j].remaining_energy_deficit
-                    avg_power_demand += (
-                        vehicles[j].remaining_energy_deficit
-                        / vehicles[j].remaining_park_duration
-                    )
-
-            # state = np.append(state, np.array([free_grid_capa/1000, PV/500, electricity_price, peak_usage/1000,
-            #                                    avg_energy_demand/1000, avg_power_demand/10]))
-            if Configuration.instance().dynamic_storage_scheduling:
-                state = np.append(
-                    state,
-                    np.array(
-                        [
-                            storage_SoC / 300,
-                            PV / 500,
-                            electricity_price,
-                            peak_usage / 1000,
-                            avg_energy_demand / 1000,
-                            avg_power_demand / 10,
-                        ]
-                    ),
+        except KeyError:
+            # If time index doesn't exist, return 0 (no generation)
+            pv_generation = 0.0
+        
+        # Extract electricity price
+        hour = (env.now % 1440 - env.now % 60) / 60
+        electricity_price = charging_hub.electricity_tariff[int(hour)]
+        
+        # Extract peak usage
+        peak_usage = charging_hub.operator.peak_threshold
+        
+        # Calculate demand metrics
+        avg_energy_demand, avg_power_demand = self._calculate_demand_metrics(charging_hub)
+        
+        # Extract grid capacity
+        free_grid_capacity = self._extract_grid_capacity(charging_hub)
+        
+        return PricingState(
+            storage_soc=storage_soc,
+            pv_generation=pv_generation,
+            electricity_price=electricity_price,
+            peak_usage=peak_usage,
+            avg_energy_demand=avg_energy_demand,
+            avg_power_demand=avg_power_demand,
+            free_grid_capacity=free_grid_capacity
+        )
+    
+    def _calculate_demand_metrics(self, charging_hub: Any) -> Tuple[float, float]:
+        """
+        Calculate average energy and power demand.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            
+        Returns:
+            Tuple of (avg_energy_demand, avg_power_demand)
+        """
+        avg_energy_demand = 0
+        avg_power_demand = 0
+        
+        for charger in charging_hub.chargers:
+            vehicles = charger.connected_vehicles
+            for vehicle in vehicles:
+                avg_energy_demand += vehicle.remaining_energy_deficit
+                avg_power_demand += (
+                    vehicle.remaining_energy_deficit / vehicle.remaining_park_duration
                 )
+        
+        return avg_energy_demand, avg_power_demand
+    
+    def _extract_grid_capacity(self, charging_hub: Any) -> float:
+        """
+        Extract free grid capacity.
+        
+        Args:
+            charging_hub: Reference to the charging hub
+            
+        Returns:
+            Free grid capacity
+        """
+        try:
+            free_grid_capa = charging_hub.operator.free_grid_capa_actual
+            if isinstance(free_grid_capa, list):
+                if len(free_grid_capa) > 0:
+                    return free_grid_capa[0]
+                else:
+                    return 0.0
             else:
-                state = np.append(
-                    state,
-                    np.array(
-                        [
-                            PV / 500,
-                            electricity_price,
-                            peak_usage / 1000,
-                            avg_energy_demand / 1000,
-                            avg_power_demand / 10,
-                        ]
-                    ),
-                )
-
-        # print(state)
-
-        return state
-
-    def step(self, action, charging_hub=None, env=None):
+                return free_grid_capa
+        except Exception as e:
+            # If we can't access grid capacity, return 0
+            return 0.0
+    
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]:
+        """
+        Take a step in the environment.
+        
+        Args:
+            action: Action to take
+            
+        Returns:
+            Tuple of (observation, reward, done, info)
+        """
         self.current_step += 1
-        reward = self._take_action(action, charging_hub, env)
-        done = self.current_step >= 100000000000000
-        obs = self._next_observation(charging_hub, env)
+        reward = self._take_action(action)
+        done = self.current_step >= self.MAX_EPISODE_STEPS
+        obs = self._next_observation()
+        
         return obs, reward, done, {}
-
-    def receive_action(self):
+    
+    def receive_action(self) -> Optional[np.ndarray]:
+        """Get the current action."""
         return self.action
-
-    def reset(self):
-        # Reset the state of the environment to an initial state
+    
+    def reset(self) -> np.ndarray:
+        """
+        Reset the environment to initial state.
+        
+        Returns:
+            Initial observation
+        """
         self.current_step = 0
         self.reward = 0
-        # self.state = self.get_state()
+        
         if not self.charging_hub:
             return self.get_state(None, None)
         return self.get_state(self.charging_hub, self.env)
-
-    def render(self, mode="human", close=False):
-        print(self.reward)
-
-    def _take_action(self, action, charging_hub=None, env=None):
-
-        reward = 0
-        # hour = int((self.env.now % 1440) / 60)
-        # prices = [0,0]
-        # DQN_pricing = convert_to_vector(action)
-        # for i in range(len(DQN_pricing)):
-        #     DQN_pricing[i] = DQN_pricing[i] * 0.2 * (i + 1)
-        # for i in range(2):
-        #     prices[i] = Configuration.instance().prices[i] - hour / 4 / 20
-        #     reward -= (prices[i] - DQN_pricing[i])**2
-
-        reward -= charging_hub.reward["missed"]
-        # reward -= charging_hub.reward['feasibility_storage'] * 0.1
-        self.total_reward["missed"] += reward
-
-        charging_hub.reward["missed"] = 0
-        charging_hub.reward["feasibility_storage"] = 0
-        charging_hub.reward["feasibility"] = 0
-        return reward / 100
-
-    def _next_observation(self, charging_hub, env):
-        return self.get_state(charging_hub, env)
+    
+    def render(self, mode: str = "human", close: bool = False) -> None:
+        """Render the environment state."""
+        if mode == "human":
+            print(f"Pricing Environment - Reward: {self.reward}")
+    
+    def _take_action(self, action: np.ndarray) -> float:
+        """
+        Execute the action and calculate reward.
+        
+        Args:
+            action: Action to execute
+            
+        Returns:
+            Reward value
+        """
+        # Store the action for the simulation to use
+        self.action = action
+        
+        # Calculate reward using the single reward calculation method
+        reward = self._calculate_reward(action)
+        
+        return reward
+    
+
+    
+    def _calculate_reward(self, action: np.ndarray) -> float:
+        """
+        Calculate reward using the simulation-based approach with operator.reward_computing().
+        
+        This method uses the operator's reward computation as the primary source
+        and includes additional reward components for better learning.
+        
+        Args:
+            action: Action to execute
+            
+        Returns:
+            float: Calculated reward value
+        """
+        reward = 0.0
+        
+        if self.charging_hub:
+            # 1. OPERATOR REWARD COMPUTING (primary reward source)
+            # Use the operator's reward_computing method which includes:
+            # - Peak threshold violation penalties
+            # - Objective function change rewards
+            if hasattr(self.charging_hub, 'operator') and self.charging_hub.operator:
+                try:
+                    operator_reward = self.charging_hub.operator.reward_computing()
+                    reward += operator_reward
+                except Exception as e:
+                    print(f"Operator reward computing failed: {e}")
+            
+            # 2. MISSED PENALTIES (from simulation)
+            profit = self.charging_hub.reward.get("profit", 0)
+            reward -= profit
+            self.total_reward["profit"] += reward
+            
+            # Reset rewards for next step
+            self.charging_hub.reward["profit"] = 0
+            self.charging_hub.reward["feasibility_storage"] = 0
+            self.charging_hub.reward["feasibility"] = 0
+            
+            # 3. FALLBACK REWARD (ensure non-zero rewards for learning)
+            if reward == 0.0:
+                reward = 0.01
+            
+            # Debug output (only print occasionally to avoid spam)
+            if hasattr(self, 'current_step'):
+                self.current_step += 1
+            else:
+                self.current_step = 1
+        
+        return reward
+    
+    def _next_observation(self) -> np.ndarray:
+        """
+        Get the next observation.
+        
+        Returns:
+            Next observation
+        """
+        return self.get_state(self.charging_hub, self.env)
 
 
-def convert_to_scalar(a):
-    # print(a)
+def convert_to_scalar(action_vector: np.ndarray) -> int:
+    """
+    Convert action vector to scalar for discrete actions.
+    
+    Args:
+        action_vector: Vector of actions
+        
+    Returns:
+        Scalar action value
+    """
     action = 0
     for i in range(2):
-        action += a[i] * (5) ** (1 - i)
-    # print(action)
+        action += action_vector[i] * (5) ** (1 - i)
     return int(action)
 
 
-def convert_to_vector(a, h=1):
-    # print(a)
+def convert_to_vector(scalar_action: int, h: int = 1) -> np.ndarray:
+    """
+    Convert scalar action to vector for discrete actions.
+    
+    Args:
+        scalar_action: Scalar action value
+        h: Height parameter for conversion
+        
+    Returns:
+        Vector of actions
+    """
     action = np.zeros(2)
     j = 0
+    
     for i in range(2):
-        action[i] = int((a - a % (k ** (h - j))) / (k ** (h - j)))
-        a = a % (k ** (h - j))
+        action[i] = int((scalar_action - scalar_action % (5 ** (h - j))) / (5 ** (h - j)))
+        scalar_action = scalar_action % (5 ** (h - j))
         j += 1
-    # print(action)
+    
     return action
diff --git a/utilities/training_manager.py b/utilities/training_manager.py
new file mode 100644
index 0000000..68ce0b9
--- /dev/null
+++ b/utilities/training_manager.py
@@ -0,0 +1,161 @@
+"""
+Training Manager Module
+
+This module handles training operations for learnable agents in the EVCC simulation framework.
+"""
+
+from typing import Optional, List
+from resources.configuration.configuration import Configuration
+from utilities.sim_input_processing import sample_week
+from run_simulation import run_single_simulation
+import pandas as pd
+
+
+def run_learnable_agent_training(agent_type: str, agent, decision_type: str, config: Configuration) -> None:
+    """
+    Run training for learnable agents (RL agents).
+    
+    Args:
+        agent_type: Type of agent (RL_SAC, RL_DQN, RL_DDPG)
+        agent: The agent instance to train
+        decision_type: Type of decision (pricing, charging, storage, routing)
+        config: Configuration instance
+    """
+    print(f"Starting training for {decision_type} agent: {agent_type}")
+    
+    # Check if hyperparameter tuning is enabled
+    enable_hyperparameter_tuning = getattr(config, 'enable_hyperparameter_tuning', False)
+    
+    if enable_hyperparameter_tuning and decision_type == "pricing":
+        print("Hyperparameter tuning enabled - running find_best_parameters()")
+        from utilities.hyperparameter_tuner import find_best_parameters
+        find_best_parameters(agent, config)
+    else:
+        print("Running standard training...")
+        run_standard_training(agent, decision_type, config)
+    
+    print(f"Training completed for {decision_type} agent: {agent_type}")
+
+
+def run_standard_training(agent, decision_type: str, config: Configuration, return_rewards: bool = False):
+    """
+    Run standard training for the agent.
+    
+    Args:
+        agent: The agent instance to train
+        decision_type: Type of decision (pricing, charging, storage, routing)
+        config: Configuration instance
+        return_rewards: Whether to return reward history for hyperparameter tuning
+        
+    Returns:
+        List of rewards if return_rewards=True, otherwise None
+    """
+    print(f"Running standard training for {decision_type} agent...")
+    
+    # Training parameters
+    NUMBER_EPISODES = 301
+    if config.pricing_mode == "perfect_info":
+        NUMBER_EPISODES = 1
+    
+    training_results = pd.DataFrame([])
+    episode = 1
+    output = []
+    
+    while episode <= NUMBER_EPISODES:
+        # Sample training week
+        START = sample_week(
+            sim_seasons=config.SIM_SEASON,
+            summer_start=config.SUMMER_START,
+            summer_end=config.SUMMER_END,
+            seed=42,
+        )
+        print(f"Episode {episode}: Training on week starting {START}")
+        
+        # Set evaluation mode
+        evaluation_episodes = 10
+        time_to_learn = agent.hyperparameters.get("min_steps_before_learning", 1000)
+        
+        if config.evaluation_after_training:
+            evaluation_episodes = 1
+            time_to_learn = 0
+        
+        # Charger configuration
+        chargers = {
+            "fast_one": config.facility_size,
+            "fast_two": 0,
+            "fast_four": 0,
+            "slow_one": 0,
+            "slow_two": 0,
+            "slow_four": 0,
+        }
+        
+        # Check if evaluation is needed
+        if (episode % evaluation_episodes == 0 and 
+            hasattr(agent, 'global_step_number') and 
+            agent.global_step_number >= time_to_learn):
+            
+            agent.do_evaluation_iterations = True
+            print(f"Episode {episode}: Running evaluation")
+        else:
+            agent.do_evaluation_iterations = False
+        
+        # Run simulation
+        try:
+            # During training, we don't want to save results every episode
+            # Only save during evaluation episodes or if explicitly requested
+            is_evaluation_episode = (episode % evaluation_episodes == 0 and 
+                                   hasattr(agent, 'global_step_number') and 
+                                   agent.global_step_number >= time_to_learn)
+            
+            # Prepare results parameters only for evaluation episodes
+            results_params = None
+            if is_evaluation_episode:
+                results_params = [f"{getattr(config, 'POST_FIX', 'sim')}", f"state{9}", f"week{episode}"]
+            
+            df = run_single_simulation(
+                charging_agent=None,  # Will be set by the simulation
+                storage_agent=None,   # Will be set by the simulation
+                pricing_agent=agent,
+                num_charger=chargers,
+                turn_off_monitoring=False,
+                turn_on_results=results_params,  # Only save results during evaluation
+                turn_on_plotting=is_evaluation_episode,  # Only plot during evaluation
+                transformer_num=config.TRANSFORMER_NUM,
+                storage_capa=config.STORAGE_SIZE,
+                pv_capa=config.PV_INSTALLED_CAPA,
+                year=9,
+                start_day=START,
+            )
+            
+            # Update learning rate if supported
+            if hasattr(agent, 'update_lr'):
+                agent.update_lr(new_objective=df["profit"], episode=episode)
+            
+            # Save training results only during evaluation episodes and if enabled
+            if (is_evaluation_episode and 
+                not config.evaluation_after_training and 
+                getattr(config, 'save_training_results', False)):
+                training_results = pd.concat([training_results, df])
+                training_results.to_csv(
+                    f'{config.OUTPUT_DATA_PATH}training_results_{agent.config.name}.csv'
+                )
+            
+            output.append(df["profit"].values[0])
+            
+            # Print progress
+            if episode % 10 == 0:
+                print(f"Episode {episode}: Profit = {df['profit'].values[0]:.2f}")
+            
+        except Exception as e:
+            print(f"Error in episode {episode}: {e}")
+            output.append(0)  # Default value on error
+        
+        episode += 1
+        if hasattr(agent, 'episode_number'):
+            agent.episode_number += 1
+    
+    print(f"Standard training completed for {decision_type} agent")
+    
+    if return_rewards:
+        return output[9:-1:10][-10:] if len(output) > 20 else output
+    return None