automl · TheEimer · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Aug 6, 2025
diff --git a/.github/workflows/publish-release.yaml b/.github/workflows/publish-release.yaml
@@ -13,37 +13,6 @@ on:
     types: [created]
 
 jobs:
-  test:
-    name: publish-release
-    runs-on: "ubuntu-latest"
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          # Install a specific version of uv.
-          version: "0.6.14"
-
-      - name: "Set up Python"
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: "pyproject.toml"
-
-      - name: Install ${{ env.package-name }}
-        run: make install-dev
-
-      - name: Store git status
-        id: status-before
-        shell: bash
-        run: |
-          echo "::set-output name=BEFORE::$(git status --porcelain -b)"
-
-      - name: Tests
-        run: make test
-
   pypi-publish:
     name: Upload release to PyPI
     runs-on: ubuntu-latest
@@ -71,4 +40,4 @@ jobs:
         run: uv build
 
       - name: Publish package distributions to PyPI
-        run: uv publish
+        run: uv publish
diff --git a/mighty/configs/algorithm/sac.yaml b/mighty/configs/algorithm/sac.yaml
@@ -7,43 +7,50 @@ algorithm_kwargs:
   # Normalization
   normalize_obs: False
   normalize_reward: False
+  rescale_action: True  # CRITICAL: Add this! Must be True for MuJoCo
 
   # Network sizes
-  n_policy_units:     256        
-  soft_update_weight: 0.005  
+  n_policy_units: 256        
+  soft_update_weight: 0.005  # tau in SAC terms
 
   # Replay buffer
   replay_buffer_class:
     _target_: mighty.mighty_replay.MightyReplay
   replay_buffer_kwargs:
     capacity: 1e6
 
+
   # Scheduling & batch-updates
-  batch_size:       256
-  learning_starts:  5000
-  update_every:     1
-  n_gradient_steps: 1
+  batch_size: 256
+  learning_starts: 5000  # Good, matches CleanRL
+  update_every: 1        # Good, update every step
+  n_gradient_steps: 1    # Good
 
   # Learning rates
   policy_lr: 3e-4
-  q_lr:      1e-3  
-  alpha_lr:  1e-3
+  q_lr: 1e-3      # This is correct now (was 3e-4)
+  alpha_lr: 3e-4  # 3e-4 is better than 1e-3 for alpha
 
   # SAC hyperparameters
   gamma: 0.99
   alpha: 0.2
   auto_alpha: True
-  target_entropy: -6.0  # -action_dim for HalfCheetah (6 actions)
+  target_entropy: null  # Let it auto-compute as -action_dim
+
+  # Network architecture
+  hidden_sizes: [256, 256]  # Explicitly specify
+  activation: relu
+  log_std_min: -5
+  log_std_max: 2
 
   # Policy configuration
   policy_class: mighty.mighty_exploration.StochasticPolicy
   policy_kwargs:
-    entropy_coefficient: 0.0
     discrete: False
-
+    # Remove entropy_coefficient - SAC handles alpha internally
 
   # SAC specific frequencies
-  policy_frequency: 2           # Delayed policy updates
+  policy_frequency: 2           # Can also try 1 for even better performance
   target_network_frequency: 1   # Update targets every step
 
 # Environment and training configuration
@@ -55,5 +62,5 @@ max_episode_steps: 1000       # HalfCheetah episode length
 eval_frequency: 10000         # More frequent eval for single env
 save_frequency: 50000         # Save every 50k steps
 
-
-#  python mighty/run_mighty.py algorithm=sac env=HalfCheetah-v4 num_steps=1e6 num_envs=1
+# Command to run:
+# python mighty/run_mighty.py algorithm=sac env=HalfCheetah-v4 num_steps=1e6 num_envs=1
diff --git a/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml b/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml
diff --git a/mighty/configs/environment/pufferlib_ocean/memory.yaml b/mighty/configs/environment/pufferlib_ocean/memory.yaml
diff --git a/mighty/configs/environment/pufferlib_ocean/password.yaml b/mighty/configs/environment/pufferlib_ocean/password.yaml
@@ -4,4 +4,4 @@ num_steps: 50_000
 env: pufferlib.ocean.password
 env_kwargs: {}
 env_wrappers: []
-num_envs: 1
+num_envs: 64
diff --git a/mighty/configs/environment/pufferlib_ocean/squared.yaml b/mighty/configs/environment/pufferlib_ocean/squared.yaml
@@ -3,5 +3,5 @@
 num_steps: 50_000 
 env: pufferlib.ocean.squared
 env_kwargs: {}
-env_wrappers: [mighty.utils.wrappers.FlattenVecObs]
-num_envs: 1
+env_wrappers: [mighty.mighty_utils.wrappers.FlattenVecObs]
+num_envs: 64
diff --git a/mighty/configs/environment/pufferlib_ocean/stochastic.yaml b/mighty/configs/environment/pufferlib_ocean/stochastic.yaml
@@ -4,4 +4,4 @@ num_steps: 50_000
 env: pufferlib.ocean.stochastic
 env_kwargs: {}
 env_wrappers: []
-num_envs: 1
+num_envs: 64
diff --git a/mighty/configs/exploration/ez_greedy.yaml b/mighty/configs/exploration/ez_greedy.yaml
@@ -1,3 +1,4 @@
 # @package _global_
 algorithm_kwargs:
-  policy_class: mighty.mighty_exploration.EZGreedy
+  policy_class: mighty.mighty_exploration.EZGreedy
+  policy_kwargs: null
diff --git a/mighty/configs/ppo_smac.yaml b/mighty/configs/ppo_smac.yaml
diff --git a/mighty/configs/sac_smac.yaml b/mighty/configs/sac_smac.yaml
diff --git a/mighty/configs/search_space/dqn_rs.yaml b/mighty/configs/search_space/dqn_rs.yaml
diff --git a/mighty/configs/search_space/dqn_template.yaml b/mighty/configs/search_space/dqn_template.yaml
diff --git a/mighty/configs/search_space/ppo_rs.yaml b/mighty/configs/search_space/ppo_rs.yaml
diff --git a/mighty/configs/search_space/sac_rs.yaml b/mighty/configs/search_space/sac_rs.yaml