Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 1 addition & 32 deletions .github/workflows/publish-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,6 @@ on:
types: [created]

jobs:
test:
name: publish-release
runs-on: "ubuntu-latest"

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
# Install a specific version of uv.
version: "0.6.14"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install ${{ env.package-name }}
run: make install-dev

- name: Store git status
id: status-before
shell: bash
run: |
echo "::set-output name=BEFORE::$(git status --porcelain -b)"

- name: Tests
run: make test

pypi-publish:
name: Upload release to PyPI
runs-on: ubuntu-latest
Expand Down Expand Up @@ -71,4 +40,4 @@ jobs:
run: uv build

- name: Publish package distributions to PyPI
run: uv publish
run: uv publish
35 changes: 21 additions & 14 deletions mighty/configs/algorithm/sac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,50 @@ algorithm_kwargs:
# Normalization
normalize_obs: False
normalize_reward: False
rescale_action: True # CRITICAL: Add this! Must be True for MuJoCo

# Network sizes
n_policy_units: 256
soft_update_weight: 0.005
n_policy_units: 256
soft_update_weight: 0.005 # tau in SAC terms

# Replay buffer
replay_buffer_class:
_target_: mighty.mighty_replay.MightyReplay
replay_buffer_kwargs:
capacity: 1e6


# Scheduling & batch-updates
batch_size: 256
learning_starts: 5000
update_every: 1
n_gradient_steps: 1
batch_size: 256
learning_starts: 5000 # Good, matches CleanRL
update_every: 1 # Good, update every step
n_gradient_steps: 1 # Good

# Learning rates
policy_lr: 3e-4
q_lr: 1e-3
alpha_lr: 1e-3
q_lr: 1e-3 # This is correct now (was 3e-4)
alpha_lr: 3e-4 # 3e-4 is better than 1e-3 for alpha

# SAC hyperparameters
gamma: 0.99
alpha: 0.2
auto_alpha: True
target_entropy: -6.0 # -action_dim for HalfCheetah (6 actions)
target_entropy: null # Let it auto-compute as -action_dim

# Network architecture
hidden_sizes: [256, 256] # Explicitly specify
activation: relu
log_std_min: -5
log_std_max: 2

# Policy configuration
policy_class: mighty.mighty_exploration.StochasticPolicy
policy_kwargs:
entropy_coefficient: 0.0
discrete: False

# Remove entropy_coefficient - SAC handles alpha internally

# SAC specific frequencies
policy_frequency: 2 # Delayed policy updates
policy_frequency: 2 # Can also try 1 for even better performance
target_network_frequency: 1 # Update targets every step

# Environment and training configuration
Expand All @@ -55,5 +62,5 @@ max_episode_steps: 1000 # HalfCheetah episode length
eval_frequency: 10000 # More frequent eval for single env
save_frequency: 50000 # Save every 50k steps


# python mighty/run_mighty.py algorithm=sac env=HalfCheetah-v4 num_steps=1e6 num_envs=1
# Command to run:
# python mighty/run_mighty.py algorithm=sac env=HalfCheetah-v4 num_steps=1e6 num_envs=1

This file was deleted.

7 changes: 0 additions & 7 deletions mighty/configs/environment/pufferlib_ocean/memory.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion mighty/configs/environment/pufferlib_ocean/password.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ num_steps: 50_000
env: pufferlib.ocean.password
env_kwargs: {}
env_wrappers: []
num_envs: 1
num_envs: 64
4 changes: 2 additions & 2 deletions mighty/configs/environment/pufferlib_ocean/squared.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
num_steps: 50_000
env: pufferlib.ocean.squared
env_kwargs: {}
env_wrappers: [mighty.utils.wrappers.FlattenVecObs]
num_envs: 1
env_wrappers: [mighty.mighty_utils.wrappers.FlattenVecObs]
num_envs: 64
2 changes: 1 addition & 1 deletion mighty/configs/environment/pufferlib_ocean/stochastic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ num_steps: 50_000
env: pufferlib.ocean.stochastic
env_kwargs: {}
env_wrappers: []
num_envs: 1
num_envs: 64
3 changes: 2 additions & 1 deletion mighty/configs/exploration/ez_greedy.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# @package _global_
algorithm_kwargs:
policy_class: mighty.mighty_exploration.EZGreedy
policy_class: mighty.mighty_exploration.EZGreedy
policy_kwargs: null
51 changes: 0 additions & 51 deletions mighty/configs/ppo_smac.yaml

This file was deleted.

51 changes: 0 additions & 51 deletions mighty/configs/sac_smac.yaml

This file was deleted.

15 changes: 0 additions & 15 deletions mighty/configs/search_space/dqn_rs.yaml

This file was deleted.

11 changes: 0 additions & 11 deletions mighty/configs/search_space/dqn_template.yaml

This file was deleted.

41 changes: 0 additions & 41 deletions mighty/configs/search_space/ppo_rs.yaml

This file was deleted.

9 changes: 0 additions & 9 deletions mighty/configs/search_space/sac_rs.yaml

This file was deleted.

Loading
Loading