automl · amsks · Nov 30, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/examples/README.md b/examples/README.md
@@ -234,4 +234,9 @@ python mighty/run_mighty.py --config-path=../examples --config-name=hypersweeper
 ## Logging & Plotting
 We have an example notebook that shows you how to load and plot the default Mighty logs. Apart from these, you can also use Tensorboard or W&B for your plotting needs, though for these you should refer to their own documentations.
 You can run this notebook to produce new runs for plotting, use randomly generated example data or load your own data into it.
-For now these examples are focused on single-task learning instead of generalization or multi-task RL, but we plan on expanding them.
+
+We also include an example on how to examine generalization behavior. This example can be run using the following command:
+```bash
+sh examples/run_generalization_protocol.sh
+```
+You can also simply inspect the existing runs in the associated [notebook](./plot_carl_generalization_example.ipynb). There you'll see a comparison of different training distributions on a larger test context distribution for two context features of CARLCartPole. 
diff --git a/...arl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/config.yaml b/...arl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/config.yaml
@@ -0,0 +1,53 @@
+runner: standard
+debug: false
+seed: 42
+output_dir: examples/carl_generalization_example
+wandb_project: null
+tensorboard_file: null
+experiment_name: carl_cartpole_generalization_mode_a/seed
+algorithm_kwargs:
+  rescale_action: false
+  tanh_squash: false
+  rollout_buffer_class:
+    _target_: mighty.mighty_replay.MightyRolloutBuffer
+  rollout_buffer_kwargs:
+    buffer_size: 2048
+    gamma: 0.99
+    gae_lambda: 0.95
+    obs_shape: ???
+    act_dim: ???
+    n_envs: ???
+    discrete_action: ???
+  learning_rate: 0.0003
+  batch_size: 2048
+  gamma: 0.99
+  ppo_clip: 0.2
+  value_loss_coef: 0.5
+  entropy_coef: 0.01
+  max_grad_norm: 0.5
+  hidden_sizes:
+  - 256
+  - 256
+  activation: tanh
+  n_gradient_steps: 1
+  n_epochs: 10
+  minibatch_size: 128
+  kl_target: null
+  use_value_clip: true
+  policy_class: mighty.mighty_exploration.StochasticPolicy
+  policy_kwargs:
+    entropy_coefficient: 0.0
+eval_every_n_steps: 5000.0
+n_episodes_eval: 10
+checkpoint: null
+save_model_every_n_steps: 1000
+cluster: {}
+algorithm: PPO
+num_steps: 250000
+env: CARLCartPole
+env_kwargs:
+  load_contexts: examples/carl_generalization_example/context_sets/train_contexts_a.json
+  load_eval_contexts: examples/carl_generalization_example/context_sets/eval_contexts_a.json
+env_wrappers:
+- mighty.mighty_utils.wrappers.FlattenVecObs
+num_envs: 64
diff --git a/...carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/hydra.yaml b/...carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/hydra.yaml
@@ -0,0 +1,189 @@
+hydra:
+  run:
+    dir: ${output_dir}/${experiment_name}_${seed}
+  sweep:
+    dir: ${output_dir}/${experiment_name}_${seed}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: Mighty-DACs
+    header: '== ${hydra.help.app_name} ==
+
+      The Mighty cRL library you''ve been looking for!'
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help'
+    template: "${hydra.help.header}\n\n== Configuration groups ==\nCompose your configuration\
+      \ from those algorithms (algorithm=dqn)\n\n$APP_CONFIG_GROUPS\n\n== Common Hyperparameters\
+      \ ==\n* debug:              flag to toggle debug output (default: false)\n*\
+      \ seed:               Which seed to use (default: 0)\n* output_dir:        \
+      \ Where to store result data (default: /tmp)\n                      hydra specific\
+      \ information will be in \"output_dir/year-month-day/timestamp/.hydra\"\n\n\
+      * wandb_project:      For wandb integration (default: null)\n* tensorboard_file:\
+      \   For tensorboard integration (default: null)\n* experiment_name:    The folder\
+      \ in which the specific experiment data is to be stored.\n                 \
+      \     I.e. the path will be \"output_dir/experiment_name\"\n\n* algorithm_kwargs:\
+      \   A dictionary to specify hyperparameter settings to the algorithms.\n   \
+      \                   Will be overwritten/populated with the choice of algorithm.\n\
+      * num_steps:          Maximum number of steps in the environment before episode\
+      \ ends. (default: 1000000)\n* env:                The environment string name\
+      \ to use, e.g., MountainCarContinuous (default: CartPole-v1)\n             \
+      \         For gym environments please see https://www.gymlibrary.ml/ (simple\
+      \ control environments are by\n                      default supported)\n  \
+      \                    For DACBench environments please see https://github.com/automl/DACBench\n\
+      \                      For CARL environments please see https://github.com/automl/CARL\n\
+      * env_kwargs:         Dict to modify environment parameters. Note: Currently\
+      \ only supported for CARL envs\n* env_warppers:       List of wrapper classes\
+      \ to apply to the environment. (default: [])\n\n* eval_every_n_steps: Training\
+      \ steps interval after which the agent is evaluated on a separate eval_env,\
+      \ i.e., a \n                      second copy of the training env (default:\
+      \ 1000)\n* n_episodes_eval:    Training episodes interval after which the agent\
+      \ is evlauted on a separate eval_env, i.e., a \n                      second\
+      \ copy of the training environment (default: null)\n* checkpoint:         Path\
+      \ to load a checkpointed model from. This allows to contnue training. If unset\
+      \ a new model is\n                      trained from scratch (default: null)\n\
+      \n== Config ==\nAny key=value argument can be overridden (use dots for.nested=overrides),\
+      \ for example:\npython mighty/run_mighty.py 'algorithm=ppo' 'env=MountainCarContinuous'\
+      \ 'num_steps=1000' 'algorithm_kwargs.learning_rate=0.1'\nor\npython mighty/run_mighty.py\
+      \ 'algorithm=dqn' 'env=SigmoidBenchmark' 'num_steps=100000'\n\nThis is the configuration\
+      \ that was generated for this run:\n-------\n$CONFIG\n-------\n\n${hydra.help.footer}"
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      colorlog:
+        (): colorlog.ColoredFormatter
+        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: colorlog
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+      colorlog:
+        (): colorlog.ColoredFormatter
+        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
+          - %(message)s'
+        log_colors:
+          DEBUG: purple
+          INFO: green
+          WARNING: yellow
+          ERROR: red
+          CRITICAL: red
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: colorlog
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - algorithm=ppo
+    - env=CARLCartPole
+    - seed=42
+    - num_steps=250000
+    - env_wrappers=[mighty.mighty_utils.wrappers.FlattenVecObs]
+    - algorithm_kwargs.rollout_buffer_kwargs.buffer_size=2048
+    - +env_kwargs.load_contexts=examples/carl_generalization_example/context_sets/train_contexts_a.json
+    - +env_kwargs.load_eval_contexts=examples/carl_generalization_example/context_sets/eval_contexts_a.json
+    - output_dir=examples/carl_generalization_example
+    - experiment_name=carl_cartpole_generalization_mode_a/seed
+  job:
+    name: run_mighty
+    chdir: null
+    override_dirname: +env_kwargs.load_contexts=examples/carl_generalization_example/context_sets/train_contexts_a.json,+env_kwargs.load_eval_contexts=examples/carl_generalization_example/context_sets/eval_contexts_a.json,algorithm=ppo,algorithm_kwargs.rollout_buffer_kwargs.buffer_size=2048,env=CARLCartPole,env_wrappers=[mighty.mighty_utils.wrappers.FlattenVecObs],experiment_name=carl_cartpole_generalization_mode_a/seed,num_steps=250000,output_dir=examples/carl_generalization_example,seed=42
+    id: ???
+    num: ???
+    config_name: base
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.3'
+    cwd: /Users/theeimer/Documents/git/Mighty-DACS
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /Users/theeimer/Documents/git/Mighty-DACS/mighty/configs
+      schema: file
+      provider: main
+    - path: hydra_plugins.hydra_colorlog.conf
+      schema: pkg
+      provider: hydra-colorlog
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /Users/theeimer/Documents/git/Mighty-DACS/examples/carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42
+    choices:
+      environment: pufferlib_ocean/bandit
+      algorithm: ppo
+      cluster: local
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: colorlog
+      hydra/hydra_logging: colorlog
+      hydra/hydra_help: default
+      hydra/help: mighty_help
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false
diff --git a/..._generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/overrides.yaml b/..._generalization_example/carl_cartpole_generalization_mode_a/seed_42/.hydra/overrides.yaml
@@ -0,0 +1,10 @@
+- algorithm=ppo
+- env=CARLCartPole
+- seed=42
+- num_steps=250000
+- env_wrappers=[mighty.mighty_utils.wrappers.FlattenVecObs]
+- algorithm_kwargs.rollout_buffer_kwargs.buffer_size=2048
+- +env_kwargs.load_contexts=examples/carl_generalization_example/context_sets/train_contexts_a.json
+- +env_kwargs.load_eval_contexts=examples/carl_generalization_example/context_sets/eval_contexts_a.json
+- output_dir=examples/carl_generalization_example
+- experiment_name=carl_cartpole_generalization_mode_a/seed
diff --git a/...ation_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/optimizer.pt b/...ation_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/optimizer.pt
diff --git a/...ion_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/policy_head.pt b/...ion_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/policy_head.pt
diff --git a/...tion_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/value_head.pt b/...tion_example/carl_cartpole_generalization_mode_a/seed_42/checkpoints/250000/value_head.pt
diff --git a/.../carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/eval_results.csv b/.../carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/eval_results.csv
@@ -0,0 +1,51 @@
+,step,seed,eval_episodes,mean_eval_step_reward,mean_eval_reward,instances
+0,5000,42,[1.],[1.],14.0,1
+1,10000,42,[1.],[1.],17.0,2
+2,15000,42,[1.],[1.],22.0,3
+3,20000,42,[1.],[1.],16.0,4
+4,25000,42,[1.],[1.],10.0,5
+5,30000,42,[1.],[1.],16.0,6
+6,35000,42,[1.],[1.],18.0,7
+7,40000,42,[1.],[1.],16.0,8
+8,45000,42,[1.],[1.],11.0,9
+9,50000,42,[1.],[1.],22.0,10
+10,55000,42,[1.],[1.],18.0,11
+11,60000,42,[1.],[1.],12.0,12
+12,65000,42,[1.],[1.],11.0,13
+13,70000,42,[1.],[1.],15.0,14
+14,75000,42,[1.],[1.],26.0,15
+15,80000,42,[1.],[1.],11.0,16
+16,85000,42,[1.],[1.],19.0,17
+17,90000,42,[1.],[1.],58.0,18
+18,95000,42,[1.],[1.],151.0,19
+19,100000,42,[1.],[1.],122.0,20
+20,105000,42,[1.],[1.],109.0,21
+21,110000,42,[1.],[1.],128.0,22
+22,115000,42,[1.],[1.],493.0,23
+23,120000,42,[1.],[1.],194.0,24
+24,125000,42,[1.],[1.],500.0,25
+25,130000,42,[1.],[1.],236.0,26
+26,135000,42,[1.],[1.],367.0,27
+27,140000,42,[1.],[1.],300.0,28
+28,145000,42,[1.],[1.],328.0,29
+29,150000,42,[1.],[1.],86.0,30
+30,155000,42,[1.],[1.],108.0,31
+31,160000,42,[1.],[1.],500.0,32
+32,165000,42,[1.],[1.],100.0,33
+33,170000,42,[1.],[1.],23.0,34
+34,175000,42,[1.],[1.],79.0,35
+35,180000,42,[1.],[1.],500.0,36
+36,185000,42,[1.],[1.],88.0,37
+37,190000,42,[1.],[1.],51.0,38
+38,195000,42,[1.],[1.],39.0,39
+39,200000,42,[1.],[1.],169.0,40
+40,205000,42,[1.],[1.],165.0,41
+41,210000,42,[1.],[1.],50.0,42
+42,215000,42,[1.],[1.],226.0,43
+43,220000,42,[1.],[1.],500.0,44
+44,225000,42,[1.],[1.],79.0,45
+45,230000,42,[1.],[1.],63.0,46
+46,235000,42,[1.],[1.],123.0,47
+47,240000,42,[1.],[1.],243.0,48
+48,245000,42,[1.],[1.],444.0,49
+49,250000,42,[1.],[1.],500.0,50
diff --git a/...rl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/hyperparameters.csv b/...rl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/hyperparameters.csv
@@ -0,0 +1,2 @@
+,step,hp/lr,hp/pi_epsilon,hp/batch_size,hp/learning_starts,meta_modules
+0,0,0.0003,0.1,2048,1,[]
diff --git a/...carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/instance_set.json b/...carl_generalization_example/carl_cartpole_generalization_mode_a/seed_42/instance_set.json
@@ -0,0 +1 @@
+{"0": {"gravity": 8.1236203565421, "masscart": 1.0, "masspole": 0.1, "length": 0.3623978081345, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "1": {"gravity": 9.8521429192297, "masscart": 1.0, "masspole": 0.1, "length": 0.3232334448673, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "2": {"gravity": 9.1959818254342, "masscart": 1.0, "masspole": 0.1, "length": 0.64647045831, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "3": {"gravity": 8.7959754525911, "masscart": 1.0, "masspole": 0.1, "length": 0.5404460046973, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "4": {"gravity": 7.4680559213273, "masscart": 1.0, "masspole": 0.1, "length": 0.5832290311184, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "5": {"gravity": 7.0617534828874, "masscart": 1.0, "masspole": 0.1, "length": 0.3733618039414, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "6": {"gravity": 9.909729556486, "masscart": 1.0, "masspole": 0.1, "length": 0.4216968971838, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "7": {"gravity": 9.4973279224013, "masscart": 1.0, "masspole": 0.1, "length": 0.5099025726529, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "8": {"gravity": 7.6370173320348, "masscart": 1.0, "masspole": 0.1, "length": 0.4727780074568, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "9": {"gravity": 7.5454749016213, "masscart": 1.0, "masspole": 0.1, "length": 0.4164916560792, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		,step,hp/lr,hp/pi_epsilon,hp/batch_size,hp/learning_starts,meta_modules
		0,0,0.0003,0.1,2048,1,[]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"0": {"gravity": 8.1236203565421, "masscart": 1.0, "masspole": 0.1, "length": 0.3623978081345, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "1": {"gravity": 9.8521429192297, "masscart": 1.0, "masspole": 0.1, "length": 0.3232334448673, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "2": {"gravity": 9.1959818254342, "masscart": 1.0, "masspole": 0.1, "length": 0.64647045831, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "3": {"gravity": 8.7959754525911, "masscart": 1.0, "masspole": 0.1, "length": 0.5404460046973, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "4": {"gravity": 7.4680559213273, "masscart": 1.0, "masspole": 0.1, "length": 0.5832290311184, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "5": {"gravity": 7.0617534828874, "masscart": 1.0, "masspole": 0.1, "length": 0.3733618039414, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "6": {"gravity": 9.909729556486, "masscart": 1.0, "masspole": 0.1, "length": 0.4216968971838, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "7": {"gravity": 9.4973279224013, "masscart": 1.0, "masspole": 0.1, "length": 0.5099025726529, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "8": {"gravity": 7.6370173320348, "masscart": 1.0, "masspole": 0.1, "length": 0.4727780074568, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}, "9": {"gravity": 7.5454749016213, "masscart": 1.0, "masspole": 0.1, "length": 0.4164916560792, "force_mag": 10.0, "tau": 0.02, "initial_state_lower": -0.1, "initial_state_upper": 0.1}}