diff --git a/hackatari/core.py b/hackatari/core.py index 68da904..8457cd0 100644 --- a/hackatari/core.py +++ b/hackatari/core.py @@ -82,8 +82,9 @@ def __init__( self.ale = self.env.unwrapped.ale # Initialize modifications and environment settings self.step_modifs, self.reset_modifs, self.post_detection_modifs = [], [], [] + self.lives = self.ale.lives() self.inpainting_modifs, self.place_above_modifs = [], [] - + # Load modification functions dynamically try: modif_module = importlib.import_module( @@ -94,8 +95,14 @@ def __init__( self.inpainting_modifs.extend(inpainting_modifs) self.place_above_modifs.extend(place_above_modifs) except: - step_modifs, reset_modifs, post_detection_modifs = modif_module.modif_funcs( - self, modifs) + step_modifs, reset_modifs, post_detection_modifs = modif_module.modif_funcs(self, modifs) + active_modifs = [m.__name__ for m in step_modifs + reset_modifs + post_detection_modifs] + if len(active_modifs) < len(modifs): + print( + colored( + f"Warning: Some modifications not found in {self.game_name}, requested: {modifs}, found: {active_modifs}" + ) + ) self.step_modifs.extend(step_modifs) self.reset_modifs.extend(reset_modifs) @@ -205,6 +212,12 @@ def step(self, *args, **kwargs): func() obs, reward, terminated, truncated, info = self._env.step( *args, **kwargs) + + lives = self.ale.lives() + if 0 < lives < self.lives: + for func in self.reset_modifs: + func() + self.lives = lives for func in self.step_modifs: func() total_reward += float(reward) diff --git a/hackatari/games/breakout.py b/hackatari/games/breakout.py index ca31dca..821c64e 100644 --- a/hackatari/games/breakout.py +++ b/hackatari/games/breakout.py @@ -1,5 +1,7 @@ import random +import numpy as np + class GameModifications: """ @@ -17,9 +19,11 @@ def __init__(self, env): self.strength = 2 self.timer = 0 self.colors = [0, 12, 48, 113, 200] + self.colors_bricks = [134, 198, 22, 38, 54, 70] self.player_and_ball_color = 0 # Black, White, Red, Blue, Green self.all_blocks_color = 0 # Black, White, Red, Blue, Green self.row_colors = [None] * 6 + self.already_reset = False def right_drift(self): """ @@ -90,25 +94,63 @@ def color_player_and_ball_blue(self): def color_player_and_ball_green(self): self.env.set_ram(62, self.colors[4]) - def color_all_blocks_black(self): + def strobo_mode_player_and_ball(self): + color = random.randint(0, 255) + self.env.set_ram(62, color) + + def strobo_mode_player_and_ball_no_black(self): + color = random.randint(1, 255) + self.env.set_ram(62, color) + + def color_blocks(self, color): for i in range(64, 70): - self.env.set_ram(i, self.colors[0]) + self.env.set_ram(i, color) + + def color_all_blocks_black(self): + self.color_blocks(self.colors[0]) def color_all_blocks_white(self): - for i in range(64, 70): - self.env.set_ram(i, self.colors[1]) + self.color_blocks(self.colors[1]) def color_all_blocks_red(self): - for i in range(64, 70): - self.env.set_ram(i, self.colors[2]) + self.color_blocks(self.colors[2]) def color_all_blocks_blue(self): - for i in range(64, 70): - self.env.set_ram(i, self.colors[3]) + self.color_blocks(self.colors[3]) def color_all_blocks_green(self): + self.color_blocks(self.colors[4]) + + def strobo_mode_blocks(self): for i in range(64, 70): - self.env.set_ram(i, self.colors[4]) + color = random.randint(0, 255) + self.env.set_ram(i, color) + + def strobo_mode_blocks_no_black(self): + for i in range(64, 70): + color = random.randint(1, 255) + self.env.set_ram(i, color) + + def sample_new_player_and_ball_color(self): + if not self.already_reset: + color = random.choice(self.colors_bricks) + self.player_and_ball_color = color + self.already_reset = True + self.env.set_ram(62, self.player_and_ball_color) + + def sample_new_brick_colors(self): + if not self.already_reset: + colors = self.colors_bricks.copy() + np.random.shuffle(colors) + for idx, i in enumerate(range(64, 70)): + self.row_colors[idx] = colors[idx] + self.already_reset = True + + for idx, i in enumerate(range(64, 70)): + self.env.set_ram(i, self.row_colors[idx]) + + def reset(self): + self.already_reset = False def _set_active_modifications(self, active_modifs): """ @@ -140,6 +182,12 @@ def _fill_modif_lists(self): "color_all_blocks_red": self.color_all_blocks_red, "color_all_blocks_blue": self.color_all_blocks_blue, "color_all_blocks_green": self.color_all_blocks_green, + "strobo_mode_blocks": self.strobo_mode_blocks, + "strobo_mode_player_and_ball": self.strobo_mode_player_and_ball, + "strobo_mode_blocks_no_black": self.strobo_mode_blocks_no_black, + "strobo_mode_player_and_ball_no_black": self.strobo_mode_player_and_ball_no_black, + "sample_new_player_and_ball_color": self.sample_new_player_and_ball_color, + "sample_new_brick_colors": self.sample_new_brick_colors, }, "reset_modifs": { }, @@ -150,11 +198,11 @@ def _fill_modif_lists(self): "place_above_modifs": { } } - step_modifs = [modif_mapping["step_modifs"][name] for name in self.active_modifications if name in modif_mapping["step_modifs"]] reset_modifs = [modif_mapping["reset_modifs"][name] for name in self.active_modifications if name in modif_mapping["reset_modifs"]] + reset_modifs += [self.reset] post_detection_modifs = [modif_mapping["post_detection_modifs"][name] for name in self.active_modifications if name in modif_mapping["post_detection_modifs"]] inpainting_modifs = [modif_mapping["inpainting_modifs"][name] diff --git a/hackatari/games/freeway.py b/hackatari/games/freeway.py index 0d47e0a..ef1b066 100644 --- a/hackatari/games/freeway.py +++ b/hackatari/games/freeway.py @@ -16,6 +16,11 @@ def __init__(self, env): """ self.env = env self.active_modifications = set() + self.order_cars = list(range(10)) + + + def reset(self): + self.order_cars = [random.randint(0, 10) for _ in range(10)] def stop_random_car(self): """ @@ -50,6 +55,12 @@ def reverse_car_speed_top(self): val = self.env.get_ram()[1] % (i+1) self.env.set_ram(42-i, val) + def vary_car_speeds(self): + for i,j in enumerate(range(33, 43)): + car = self.order_cars[i] + val = self.env.get_ram()[1] % (car+1) + self.env.set_ram(j, val) + def stop_all_cars(self): """ Stops all cars and repositions some to predefined positions. @@ -61,6 +72,17 @@ def stop_all_cars(self): for new_pos_down in range(113, 118): self.env.set_ram(new_pos_down, 55) + def disable_cars(self): + """ + Disables all cars by stopping them and setting their positions to out of frame. + """ + for car in range(33, 43): + self.env.set_ram(car, 100) + for new_pos_down in range(108, 113): + self.env.set_ram(new_pos_down, 3) + for new_pos_down in range(113, 118): + self.env.set_ram(new_pos_down, 3) + def all_black_cars(self): """ Colors all cars black. @@ -95,6 +117,13 @@ def all_blue_cars(self): """ for car in range(77, 87): self.env.set_ram(car, 145) + + def all_pink_cars(self): + """ + Colors all cars pink. + """ + for car in range(77, 87): + self.env.set_ram(car, 90) # My modifications @@ -142,7 +171,7 @@ def speed_mode(self): """ Each car drives with speed 2 (default) """ - speed = 2 # default + speed = 2 # default ram = self.env.get_ram() for car_x in range(108, 113): x_value = ram[car_x] @@ -177,6 +206,7 @@ def _fill_modif_lists(self): "all_white_cars": self.all_white_cars, "all_red_cars": self.all_red_cars, "all_green_cars": self.all_green_cars, + "all_pink_cars": self.all_pink_cars, "all_blue_cars": self.all_blue_cars, "invisible_mode": self.invisible_mode, "strobo_mode": self.strobo_mode, @@ -185,6 +215,8 @@ def _fill_modif_lists(self): "speed_mode": self.speed_mode, "reverse_car_speed_bottom": self.reverse_car_speed_bottom, "reverse_car_speed_top": self.reverse_car_speed_top, + "disable_cars": self.disable_cars, + "vary_car_speeds": self.vary_car_speeds, }, "reset_modifs": { }, @@ -200,6 +232,7 @@ def _fill_modif_lists(self): for name in self.active_modifications if name in modif_mapping["step_modifs"]] reset_modifs = [modif_mapping["reset_modifs"][name] for name in self.active_modifications if name in modif_mapping["reset_modifs"]] + reset_modifs += [self.reset] post_detection_modifs = [modif_mapping["post_detection_modifs"][name] for name in self.active_modifications if name in modif_mapping["post_detection_modifs"]] inpainting_modifs = [modif_mapping["inpainting_modifs"][name] diff --git a/hackatari/games/frostbite.py b/hackatari/games/frostbite.py index c17fbc7..40ced75 100644 --- a/hackatari/games/frostbite.py +++ b/hackatari/games/frostbite.py @@ -125,6 +125,17 @@ def igloo7(self): """ self.env.set_ram(77, 7) + def ice_inactive(self): + """ + Sets the ice floes to inactive. This means while the player can still + walk on them, they will not give points or contribute to the igloo + building progress. + """ + self.env.set_ram(43, 152) + self.env.set_ram(44, 152) + self.env.set_ram(45, 152) + self.env.set_ram(46, 152) + def _set_active_modifications(self, active_modifs): """ Specifies which modifications are active. @@ -154,6 +165,7 @@ def _fill_modif_lists(self): "no_birds": self.no_birds, "few_enemies": self.few_enemies, "many_enemies": self.many_enemies, + "ice_inactive": self.ice_inactive, }, "reset_modifs": { "full_igloo": self.full_igloo, diff --git a/hackatari/games/pong.py b/hackatari/games/pong.py index 9eaa9f9..1e48beb 100644 --- a/hackatari/games/pong.py +++ b/hackatari/games/pong.py @@ -1,3 +1,4 @@ +import numpy as np from ocatari.ram.game_objects import NoObject @@ -17,7 +18,29 @@ def __init__(self, env): self.strength = 6 self.timer = 0 self.last_enemy_y_pos = 127 + self.last_player_y_pos = 127 self.ball_previous_x_pos = 130 + self.ball_previous_y_pos = 60 + + def parallel_enemy(self): + ram = self.env.get_ram() + player_pos = ram[60] + self.env.set_ram(21, player_pos) + + def parallel_enemy_after_hit(self): + ram = self.env.get_ram() + if 0 < ram[11] < 5: + self.env.set_ram(21, 127) + self.env.set_ram(49, 130) + if self.ball_previous_x_pos < ram[49]: + player_pos = ram[60] + self.env.set_ram(21, player_pos) + + def random_perturbation_enemy(self): + ram = self.env.get_ram() + enemy_pos = ram[21] + perturbation = np.random.randint(-5, 6) + self.env.set_ram(21, min(max(0,perturbation+enemy_pos),250)) def lazy_enemy(self): """ @@ -35,6 +58,7 @@ def lazy_enemy(self): self.ball_previous_x_pos = ram[49] self.last_enemy_y_pos = tmp + def hidden_enemy(self): """ Enemy does not move after returning the shot. @@ -106,6 +130,9 @@ def _fill_modif_lists(self): "down_drift": self.down_drift, "left_drift": self.left_drift, "right_drift": self.right_drift, + "parallel_enemy_complete": self.parallel_enemy, + "parallel_enemy": self.parallel_enemy_after_hit, + "random_perturbation_enemy": self.random_perturbation_enemy, }, "reset_modifs": { }, @@ -116,6 +143,9 @@ def _fill_modif_lists(self): "down_drift": self.down_drift, "left_drift": self.left_drift, "right_drift": self.right_drift, + "parallel_enemy_complete": self.parallel_enemy, + "parallel_enemy": self.parallel_enemy_after_hit, + "random_perturbation_enemy": self.random_perturbation_enemy, }, "inpainting_modifs": { }, diff --git a/modification_list.md b/modification_list.md index 37e29c0..f864dc9 100644 --- a/modification_list.md +++ b/modification_list.md @@ -81,6 +81,10 @@ The modes are: | inverse_gravity | Set drift dirtection upwards | | color_player_and_ball_black | Set color of player and ball (also works with white, red, blue and green) | | color_all_blocks_black | Set color of all blocks (also works with white, red, blue and green) | +| strobo_mode_player_and_ball | Player and ball change color randomly every step (also available with the random colors not including black). | +| strobo_mode_blocks | All blocks change color randomly every step (also available with the random colors not including black). | +| sample_new_player_and_ball_color | Sample a new color for the player and ball for every game. | +| sample_new_block_color | Sample a new color for the blocks for every game. | ## Carnival: | Command | Effect | @@ -133,7 +137,10 @@ The modes are: | stop_random_car | Stops a random car with a biased probability for a certain time. | | stop_all_cars | Stops all cars on the side of the board. | | align_all_cars | Align all cars so they move in a line. | -| all_black_cars | All cars are black. (also works with white, red, blue and green)| +| all_black_cars | All cars are black. (also works with white, red, blue, pink and green)| +| disable_cars | Hide and disable all cars. | +| vary_car_speeds | Randomly interchange car speeds. | + ## Frostbite: @@ -146,6 +153,8 @@ The modes are: | no_birds | Removes all birds (and fishes?) | | few_enemies | Increase enemies slightly | | many_enemies | Increase enemies to a maximum| +| ice_inactive | Ice floes will not give points or contribute to the igloo. | + building progress. ## Kangaroo: @@ -181,6 +190,9 @@ The modes are: | lazy_enemy | Enemy does not move after returning the shot. | | up_drift | Makes the ball drift upwards. (Also works with down, left and right) | | hidden_enemy | Makes the enemy invisible for the player (does not work in dqn_default since it is a object detection modification) +| random_perturbation_enemy | Add random perturbations to enemy movement. | +| parallel_enemy | In the period where the ball is moving towards the player, the enemy moves in parallel with the player. | +| parallel_ememy_complete | The enemy always moves in parallel with the player. | ## Seaquest