diff --git a/codeclash/agents/player.py b/codeclash/agents/player.py index c6fd48f1..743064f0 100644 --- a/codeclash/agents/player.py +++ b/codeclash/agents/player.py @@ -35,6 +35,7 @@ def __init__( log_path=self.game_context.log_local / "players" / self.name / "player.log", emoji="👤", ) + self._branch_name = config.get("branch", f"{self.game_context.id}.{self.name}") self._metadata = { "name": self.name, "player_unique_id": self._player_unique_id, @@ -46,10 +47,6 @@ def __init__( "agent_stats": {}, # mapping round -> agent stats } - if branch := config.get("branch_init"): - self.logger.info(f"Checking out branch {branch}") - assert_zero_exit_code(self.environment.execute(f"git checkout {branch}"), logger=self.logger) - if self.push: self.logger.info("Will push agent gameplay as branch to remote repository after each round") token = os.getenv("GITHUB_TOKEN") @@ -61,6 +58,33 @@ def __init__( ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) + # Handle branch initialization + if branch_init := config.get("branch_init"): + # Fetch from remote first (handles branches pushed in previous tournaments) + # Then checkout - git will create tracking branch if needed + assert_zero_exit_code( + self.environment.execute(f"git fetch origin && git checkout {branch_init}"), + logger=self.logger, + ) + self.logger.info(f"Checked out initial branch {branch_init}") + + if self._branch_name != branch_init: + self.logger.info(f"Switching to branch {self._branch_name} for pushing changes") + # First fetch to see if the branch exists on remote + assert_zero_exit_code( + self.environment.execute("git fetch origin"), + logger=self.logger, + ) + # Try to checkout the branch - git will track remote if it exists there + checkout_result = self.environment.execute(f"git checkout {self._branch_name}") + if checkout_result.get("returncode", 0) != 0: + # Branch doesn't exist locally or remotely, create it + self.logger.info(f"Branch {self._branch_name} doesn't exist, creating it") + assert_zero_exit_code( + self.environment.execute(f"git checkout -b {self._branch_name}"), + logger=self.logger, + ) + # --- Main methods --- def pre_run_hook(self, *, new_round: int) -> None: @@ -104,7 +128,7 @@ def post_run_hook(self, *, round: int) -> None: if self.push: for cmd in [ - f"git push origin {self._branch_name}", + f"git push -u origin {self._branch_name}", "git push origin --tags", ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) @@ -155,11 +179,6 @@ def _tag_round(self, round: int) -> None: ) self._metadata["round_tags"][round] = tag - @property - def _branch_name(self) -> str: - """Get the branch name for the agent's codebase.""" - return f"{self.game_context.id}.{self.name}" - def _get_round_tag_name(self, round: int) -> str: """Get git tag name for the version of the codebase at the given round.""" return f"{self._player_unique_id}-round-{round}" diff --git a/codeclash/arenas/robotrumble/robotrumble.py b/codeclash/arenas/robotrumble/robotrumble.py index d5d831fb..46e25bd7 100644 --- a/codeclash/arenas/robotrumble/robotrumble.py +++ b/codeclash/arenas/robotrumble/robotrumble.py @@ -10,8 +10,8 @@ DEFAULT_SIMS = 100 MAP_EXT_TO_HEADER = { - "js": "function robot(state, unit) {", - "py": "def robot(state, unit):", + "js": ["function robot(state, unit) {"], + "py": ["def robot(state, unit):", "def robot(state: State, unit: Obj)"], } ROBOTRUMBLE_HIDDEN_EXEC = ".codeclash_exec" @@ -156,11 +156,13 @@ def validate_code(self, agent: Player) -> tuple[bool, str | None]: agent.environment.execute(f'echo "robot.{ext}" > {ROBOTRUMBLE_HIDDEN_EXEC}') # Check that the robot function is defined - header = MAP_EXT_TO_HEADER[ext] - if header not in agent.environment.execute(f"cat robot.{ext}")["output"]: + if not any( + [header in agent.environment.execute(f"cat robot.{ext}")["output"] for header in MAP_EXT_TO_HEADER[ext]] + ): + headers = "\n- ".join(MAP_EXT_TO_HEADER[ext]) return ( False, - f"robot.{ext} does not contain the required robot function. It should be defined as '{header}'.", + f"robot.{ext} does not contain the required robot function. It should be defined as one of: '{headers}'.", ) test_run_cmd = f"{self.run_cmd_round} robot.{ext} robot.{ext} -t 1" try: diff --git a/configs/ablations/ladder/corewar.yaml b/configs/ablations/ladder/corewar.yaml index 7e176893..a82a18c8 100644 --- a/configs/ablations/ladder/corewar.yaml +++ b/configs/ablations/ladder/corewar.yaml @@ -1,537 +1,550 @@ tournament: - rounds: 0 + rounds: 5 game: name: CoreWar sims_per_round: 2000 players: + agent: mini + name: claude-sonnet-4-5-20250929 + branch_init: human/pspace + config: + agent: !include mini/default.yaml + model: + model_name: '@anthropic/claude-sonnet-4-5-20250929' + model_class: portkey + model_kwargs: + temperature: 0.2 + max_tokens: 4096 + push: True +prompts: + game_description: |- + Core War ladder +ladder: - agent: dummy - branch_init: human/0stormbringer -- agent: dummy - branch_init: human/abomination -- agent: dummy - branch_init: human/aeka -- agent: dummy - branch_init: human/agony31 -- agent: dummy - branch_init: human/agony51 -- agent: dummy - branch_init: human/agonyii + branch_init: human/pspace - agent: dummy - branch_init: human/alladinscave + branch_init: human/validate - agent: dummy - branch_init: human/armadillo + branch_init: human/dwarf - agent: dummy - branch_init: human/armorya5 + branch_init: human/smoothnoodlemap - agent: dummy - branch_init: human/arrow + branch_init: human/smoothnoodlemap6 - agent: dummy - branch_init: human/artofcorewar + branch_init: human/returnofthelivingdead - agent: dummy - branch_init: human/azathoth + branch_init: human/notepaper - agent: dummy - branch_init: human/backstabber + branch_init: human/vagabond - agent: dummy - branch_init: human/barrage + branch_init: human/genocide - agent: dummy - branch_init: human/bayonet + branch_init: human/paratroopsv21 - agent: dummy - branch_init: human/behemot + branch_init: human/trinity - agent: dummy - branch_init: human/beholderseye17 + branch_init: human/precipice - agent: dummy - branch_init: human/bigitalshot + branch_init: human/hydra - agent: dummy - branch_init: human/bitethebullet + branch_init: human/flypaper30 - agent: dummy - branch_init: human/blacken + branch_init: human/gammapaper30 - agent: dummy - branch_init: human/blackknight + branch_init: human/flashpaper37 - agent: dummy - branch_init: human/blade + branch_init: human/flashpaper - agent: dummy - branch_init: human/blizzard + branch_init: human/fastfoodv21 - agent: dummy - branch_init: human/bloodlust + branch_init: human/twilightpitsv60 - agent: dummy - branch_init: human/blowrag + branch_init: human/0stormbringer - agent: dummy - branch_init: human/bluecandle + branch_init: human/backstabber - agent: dummy - branch_init: human/bluefunk + branch_init: human/impurge - agent: dummy - branch_init: human/bluefunk3 + branch_init: human/requestv20 - agent: dummy - branch_init: human/blur + branch_init: human/griffin2 - agent: dummy - branch_init: human/blur2 + branch_init: human/imprimis6 - agent: dummy - branch_init: human/blur88 + branch_init: human/killerinstinct - agent: dummy - branch_init: human/borg + branch_init: human/crimp - agent: dummy - branch_init: human/borgir + branch_init: human/crimp2 - agent: dummy - branch_init: human/boysarebackintown + branch_init: human/bscannersliveinvain - agent: dummy - branch_init: human/bpanamax + branch_init: human/charonv70 - agent: dummy - branch_init: human/bscannersliveinvain + branch_init: human/nomuckingabout - agent: dummy - branch_init: human/bulldozed + branch_init: human/leprechaun1b - agent: dummy - branch_init: human/burningmetal + branch_init: human/charonv81 - agent: dummy - branch_init: human/cannonade + branch_init: human/keystonet13 - agent: dummy - branch_init: human/capskeyisstuck + branch_init: human/rave - agent: dummy - branch_init: human/carmilla + branch_init: human/sphinxv28 - agent: dummy - branch_init: human/chainlockv02a + branch_init: human/hordesofmicrowarriors - agent: dummy - branch_init: human/chameleon + branch_init: human/ncdecoy - agent: dummy - branch_init: human/charonv70 + branch_init: human/agony31 - agent: dummy - branch_init: human/charonv81 + branch_init: human/medusasv7x - agent: dummy - branch_init: human/chimerav35 + branch_init: human/sj4a - agent: dummy - branch_init: human/cinammon + branch_init: human/capskeyisstuck - agent: dummy - branch_init: human/claw + branch_init: human/thermite10 - agent: dummy - branch_init: human/cloudburst + branch_init: human/ttti - agent: dummy - branch_init: human/combatra + branch_init: human/agony51 - agent: dummy - branch_init: human/crimp + branch_init: human/stasis - agent: dummy - branch_init: human/crimp2 + branch_init: human/leprechaunonspeed - agent: dummy - branch_init: human/curseoftheundead + branch_init: human/blur88 - agent: dummy - branch_init: human/damageincorporated + branch_init: human/winterwerewolf3 - agent: dummy - branch_init: human/danceoffallenangels + branch_init: human/lucky3 - agent: dummy - branch_init: human/dawn + branch_init: human/cannonade - agent: dummy - branch_init: human/dawn2 + branch_init: human/kitchensinkii - agent: dummy - branch_init: human/decoysignal + branch_init: human/seventyfive - agent: dummy - branch_init: human/defensive + branch_init: human/fatexpansionv - agent: dummy - branch_init: human/devilish202 + branch_init: human/irongate - agent: dummy - branch_init: human/devilstick + branch_init: human/snake - agent: dummy - branch_init: human/diehard + branch_init: human/leapfrog - agent: dummy - branch_init: human/digitalis2003 + branch_init: human/chimerav35 - agent: dummy - branch_init: human/discord + branch_init: human/leviathan - agent: dummy - branch_init: human/disharmonious + branch_init: human/pacman - agent: dummy - branch_init: human/dust07 + branch_init: human/heremscimitar - agent: dummy - branch_init: human/dwarf + branch_init: human/elementaldust2 - agent: dummy - branch_init: human/eccentric + branch_init: human/foggyswamp - agent: dummy - branch_init: human/electrichead + branch_init: human/armorya5 - agent: dummy - branch_init: human/electricrazor + branch_init: human/beholderseye17 - agent: dummy - branch_init: human/elementaldust2 + branch_init: human/phq - agent: dummy - branch_init: human/elvenking + branch_init: human/abomination - agent: dummy - branch_init: human/enigma + branch_init: human/steppingstone - agent: dummy - branch_init: human/eternalexile + branch_init: human/agonyii - agent: dummy - branch_init: human/evolcap66 + branch_init: human/twister - agent: dummy branch_init: human/evoltmp88 - agent: dummy - branch_init: human/excalibur + branch_init: human/gothik - agent: dummy - branch_init: human/falconv03 + branch_init: human/quiz - agent: dummy - branch_init: human/fastfoodv21 + branch_init: human/aeka - agent: dummy - branch_init: human/fatexpansionv + branch_init: human/vamp02b - agent: dummy - branch_init: human/fireandice + branch_init: human/replicant - agent: dummy - branch_init: human/firestorm + branch_init: human/gemoftheocean - agent: dummy - branch_init: human/fixed + branch_init: human/thermiteii - agent: dummy - branch_init: human/flashpaper + branch_init: human/blur - agent: dummy - branch_init: human/flashpaper37 + branch_init: human/blur2 - agent: dummy branch_init: human/flurry - agent: dummy - branch_init: human/flypaper30 + branch_init: human/mirage2 - agent: dummy - branch_init: human/foggyswamp + branch_init: human/nightofthelivingdead - agent: dummy - branch_init: human/forgottenlore + branch_init: human/mirage15 - agent: dummy - branch_init: human/forgottenlore2 + branch_init: human/soldieroffortune - agent: dummy - branch_init: human/forjohn + branch_init: human/win - agent: dummy - branch_init: human/freighttrain + branch_init: human/icedragon - agent: dummy - branch_init: human/frothandfizzle + branch_init: human/onebite - agent: dummy - branch_init: human/gammapaper30 + branch_init: human/myvamp37 - agent: dummy - branch_init: human/gargantuan + branch_init: human/bluefunk - agent: dummy - branch_init: human/gemoftheocean + branch_init: human/tornado30 - agent: dummy - branch_init: human/genocide + branch_init: human/mason20 - agent: dummy - branch_init: human/gigolo + branch_init: human/bayonet - agent: dummy - branch_init: human/goldeneye + branch_init: human/tnt - agent: dummy - branch_init: human/gothik + branch_init: human/zygote - agent: dummy - branch_init: human/gremlin + branch_init: human/stalker - agent: dummy - branch_init: human/grendelsrevenge + branch_init: human/julietandpaper - agent: dummy - branch_init: human/griffin2 + branch_init: human/thenextstep88 - agent: dummy - branch_init: human/grilledoctopus05 + branch_init: human/chameleon - agent: dummy - branch_init: human/halcyon + branch_init: human/stoninc - agent: dummy - branch_init: human/hazylazy + branch_init: human/claw - agent: dummy - branch_init: human/hazylazyc11 + branch_init: human/myvamp54 - agent: dummy - branch_init: human/hazyshadeii + branch_init: human/infiltrator - agent: dummy - branch_init: human/hector2 + branch_init: human/yogibear - agent: dummy - branch_init: human/hellfire + branch_init: human/grilledoctopus05 - agent: dummy - branch_init: human/herbalavenger + branch_init: human/intotheunknown - agent: dummy - branch_init: human/heremscimitar + branch_init: human/probe - agent: dummy - branch_init: human/hordesofmicrowarriors + branch_init: human/torcht18 - agent: dummy - branch_init: human/hullab3loo + branch_init: human/damageincorporated - agent: dummy - branch_init: human/hullabaloo + branch_init: human/lithium - agent: dummy - branch_init: human/hydra + branch_init: human/bluefunk3 - agent: dummy - branch_init: human/icedragon + branch_init: human/labomba - agent: dummy - branch_init: human/impfinityv4g1 + branch_init: human/sneakyb2 - agent: dummy - branch_init: human/impishv02 + branch_init: human/hazyshadeii - agent: dummy - branch_init: human/imprimis6 + branch_init: human/blizzard - agent: dummy - branch_init: human/impurge + branch_init: human/jackintheboxii - agent: dummy - branch_init: human/infiltrator + branch_init: human/recon2 - agent: dummy - branch_init: human/intotheunknown + branch_init: human/curseoftheundead - agent: dummy - branch_init: human/irongate + branch_init: human/bloodlust - agent: dummy - branch_init: human/ironicimps + branch_init: human/eternalexile - agent: dummy - branch_init: human/jackinthebox + branch_init: human/sprawlingchaos - agent: dummy - branch_init: human/jackintheboxii + branch_init: human/zooom - agent: dummy - branch_init: human/jade + branch_init: human/vampsareback02 - agent: dummy branch_init: human/jinx - agent: dummy - branch_init: human/julietandpaper + branch_init: human/pendulum - agent: dummy - branch_init: human/keystonet13 + branch_init: human/nosferatu - agent: dummy - branch_init: human/killerinstinct + branch_init: human/boysarebackintown - agent: dummy - branch_init: human/kitchensinkii + branch_init: human/discord - agent: dummy - branch_init: human/kosmos + branch_init: human/jackinthebox - agent: dummy - branch_init: human/kryptonite + branch_init: human/barrage +- agent: dummy + branch_init: human/perseus - agent: dummy branch_init: human/kusanagi - agent: dummy - branch_init: human/labomba + branch_init: human/simple88v2 - agent: dummy - branch_init: human/lastjudgement + branch_init: human/excalibur +- agent: dummy + branch_init: human/carmilla +- agent: dummy + branch_init: human/bpanamax - agent: dummy - branch_init: human/leapfrog + branch_init: human/oblivion - agent: dummy - branch_init: human/leprechaun1b + branch_init: human/hector2 - agent: dummy - branch_init: human/leprechaunonspeed + branch_init: human/xenosmilus - agent: dummy - branch_init: human/leviathan + branch_init: human/macromagic - agent: dummy - branch_init: human/lithium + branch_init: human/whitemist - agent: dummy - branch_init: human/lithobolia + branch_init: human/fireandice - agent: dummy - branch_init: human/luca + branch_init: human/grendelsrevenge - agent: dummy - branch_init: human/lucky3 + branch_init: human/herbalavenger - agent: dummy - branch_init: human/macromagic + branch_init: human/unpit - agent: dummy - branch_init: human/maelstrom + branch_init: human/dust07 - agent: dummy - branch_init: human/mandragora + branch_init: human/alladinscave - agent: dummy - branch_init: human/mascafe + branch_init: human/hazylazyc11 - agent: dummy - branch_init: human/mason20 + branch_init: human/bigitalshot - agent: dummy - branch_init: human/medusasv7x + branch_init: human/shottonothing - agent: dummy - branch_init: human/mercenary + branch_init: human/hazylazy - agent: dummy - branch_init: human/mirage15 + branch_init: human/valkyrie - agent: dummy - branch_init: human/mirage2 + branch_init: human/enigma - agent: dummy - branch_init: human/myvamp37 + branch_init: human/lithobolia - agent: dummy - branch_init: human/myvamp54 + branch_init: human/electrichead - agent: dummy - branch_init: human/ncdecoy + branch_init: human/arrow - agent: dummy - branch_init: human/neith + branch_init: human/blade - agent: dummy - branch_init: human/nemesis + branch_init: human/vanquisher - agent: dummy - branch_init: human/nightofthelivingdead + branch_init: human/unpitq - agent: dummy - branch_init: human/nightterrors + branch_init: human/sputnik - agent: dummy - branch_init: human/nighttrain + branch_init: human/forgottenlore - agent: dummy - branch_init: human/nomuckingabout + branch_init: human/returnofvanquisher - agent: dummy - branch_init: human/nosferatu + branch_init: human/behemot - agent: dummy - branch_init: human/notepaper + branch_init: human/impfinityv4g1 - agent: dummy - branch_init: human/npaperii + branch_init: human/torment - agent: dummy - branch_init: human/numb + branch_init: human/falconv03 - agent: dummy - branch_init: human/oblivion + branch_init: human/borg - agent: dummy - branch_init: human/olivia + branch_init: human/thehistorian - agent: dummy - branch_init: human/ompega + branch_init: human/nightterrors - agent: dummy - branch_init: human/onebite + branch_init: human/hellfire - agent: dummy - branch_init: human/pacman + branch_init: human/revivalfire - agent: dummy - branch_init: human/paratroopsv21 + branch_init: human/timescape10 - agent: dummy - branch_init: human/pdqscan + branch_init: human/forgottenlore2 - agent: dummy - branch_init: human/pendulum + branch_init: human/electricrazor - agent: dummy - branch_init: human/perseus + branch_init: human/freighttrain - agent: dummy - branch_init: human/phq + branch_init: human/digitalis2003 - agent: dummy - branch_init: human/precipice + branch_init: human/kryptonite - agent: dummy - branch_init: human/probe + branch_init: human/riseofthedragon - agent: dummy - branch_init: human/pspace + branch_init: human/bluecandle - agent: dummy - branch_init: human/quicksilver + branch_init: human/rosebud - agent: dummy - branch_init: human/quiz + branch_init: human/slimetest - agent: dummy - branch_init: human/rave + branch_init: human/quicksilver - agent: dummy - branch_init: human/recon2 + branch_init: human/stormkeeper - agent: dummy - branch_init: human/recycledbits + branch_init: human/ompega - agent: dummy - branch_init: human/reepicheep + branch_init: human/nemesis - agent: dummy - branch_init: human/replicant + branch_init: human/fixed - agent: dummy - branch_init: human/requestv20 + branch_init: human/evolcap66 - agent: dummy branch_init: human/retroq - agent: dummy - branch_init: human/returnofthefugitive + branch_init: human/devilish202 - agent: dummy - branch_init: human/returnofthejedimp + branch_init: human/sunset - agent: dummy - branch_init: human/returnofthelivingdead + branch_init: human/blacken - agent: dummy - branch_init: human/returnofthependragon + branch_init: human/nighttrain - agent: dummy - branch_init: human/returnofvanquisher + branch_init: human/diehard +- agent: dummy + branch_init: human/bulldozed - agent: dummy branch_init: human/revengeofthepapers - agent: dummy - branch_init: human/revivalfire + branch_init: human/uninvited - agent: dummy - branch_init: human/riseofthedragon + branch_init: human/disharmonious - agent: dummy - branch_init: human/rosebud + branch_init: human/bitethebullet - agent: dummy - branch_init: human/rust + branch_init: human/vain - agent: dummy - branch_init: human/safetyinnumbers + branch_init: human/luca - agent: dummy - branch_init: human/seventyfive + branch_init: human/jade - agent: dummy - branch_init: human/shottonothing + branch_init: human/recycledbits - agent: dummy - branch_init: human/silking + branch_init: human/spiritualblackdimension - agent: dummy - branch_init: human/silkworm + branch_init: human/themystery - agent: dummy - branch_init: human/simple88v2 + branch_init: human/unrequitedlove - agent: dummy - branch_init: human/simplicity + branch_init: human/borgir - agent: dummy - branch_init: human/sj4a + branch_init: human/gremlin - agent: dummy - branch_init: human/slimetest + branch_init: human/gigolo - agent: dummy - branch_init: human/smoothnoodlemap + branch_init: human/ironicimps - agent: dummy - branch_init: human/smoothnoodlemap6 + branch_init: human/stylizedeuphoria - agent: dummy - branch_init: human/snake + branch_init: human/ziggy - agent: dummy - branch_init: human/sneakyb2 + branch_init: human/impishv02 - agent: dummy - branch_init: human/snowscan + branch_init: human/thunderstrike - agent: dummy - branch_init: human/soldieroffortune + branch_init: human/eccentric - agent: dummy - branch_init: human/sonofvain + branch_init: human/hullabaloo - agent: dummy - branch_init: human/sphinxv28 + branch_init: human/safetyinnumbers - agent: dummy - branch_init: human/spiritualblackdimension + branch_init: human/mandragora - agent: dummy - branch_init: human/sprawlingchaos + branch_init: human/gargantuan - agent: dummy - branch_init: human/sputnik + branch_init: human/elvenking - agent: dummy - branch_init: human/stalker + branch_init: human/npaperii - agent: dummy - branch_init: human/stasis + branch_init: human/hullab3loo - agent: dummy - branch_init: human/steppingstone + branch_init: human/reepicheep - agent: dummy - branch_init: human/stoninc + branch_init: human/halcyon - agent: dummy - branch_init: human/stormkeeper + branch_init: human/olivia - agent: dummy - branch_init: human/stylizedeuphoria + branch_init: human/neith - agent: dummy - branch_init: human/sunset + branch_init: human/numb - agent: dummy - branch_init: human/thefugitive + branch_init: human/returnofthependragon - agent: dummy - branch_init: human/thehistorian + branch_init: human/cinammon - agent: dummy - branch_init: human/themystery + branch_init: human/combatra - agent: dummy - branch_init: human/thenextstep88 + branch_init: human/armadillo - agent: dummy - branch_init: human/thermite10 + branch_init: human/simplicity - agent: dummy - branch_init: human/thermiteii + branch_init: human/kosmos - agent: dummy - branch_init: human/thunderstrike + branch_init: human/azathoth - agent: dummy - branch_init: human/timescape10 + branch_init: human/danceoffallenangels - agent: dummy - branch_init: human/tnt + branch_init: human/returnofthejedimp - agent: dummy - branch_init: human/torcht18 + branch_init: human/blowrag - agent: dummy - branch_init: human/torment + branch_init: human/artofcorewar - agent: dummy - branch_init: human/tornado30 + branch_init: human/silking - agent: dummy - branch_init: human/toxic + branch_init: human/goldeneye - agent: dummy - branch_init: human/trinity + branch_init: human/dawn - agent: dummy - branch_init: human/ttti + branch_init: human/sonofvain - agent: dummy - branch_init: human/twilightpitsv60 + branch_init: human/blackknight - agent: dummy - branch_init: human/twister + branch_init: human/thefugitive - agent: dummy - branch_init: human/unheardof + branch_init: human/frothandfizzle - agent: dummy - branch_init: human/uninvited + branch_init: human/snowscan - agent: dummy - branch_init: human/unpit + branch_init: human/rust - agent: dummy - branch_init: human/unpitq + branch_init: human/lastjudgement - agent: dummy - branch_init: human/unrequitedlove + branch_init: human/pdqscan - agent: dummy - branch_init: human/vagabond + branch_init: human/mercenary - agent: dummy - branch_init: human/vain + branch_init: human/dawn2 - agent: dummy - branch_init: human/validate + branch_init: human/firestorm - agent: dummy - branch_init: human/valkyrie + branch_init: human/defensive - agent: dummy - branch_init: human/vamp02b + branch_init: human/burningmetal - agent: dummy - branch_init: human/vampsareback02 + branch_init: human/chainlockv02a - agent: dummy - branch_init: human/vanquisher + branch_init: human/decoysignal - agent: dummy - branch_init: human/whitemist + branch_init: human/cloudburst - agent: dummy - branch_init: human/win + branch_init: human/mascafe - agent: dummy - branch_init: human/winterwerewolf3 + branch_init: human/devilstick - agent: dummy - branch_init: human/xenosmilus + branch_init: human/unheardof - agent: dummy - branch_init: human/yogibear + branch_init: human/returnofthefugitive - agent: dummy - branch_init: human/ziggy + branch_init: human/silkworm - agent: dummy - branch_init: human/zooom + branch_init: human/maelstrom - agent: dummy - branch_init: human/zygote -prompts: - game_description: |- - Core War ladder + branch_init: human/forjohn +- agent: dummy + branch_init: human/toxic diff --git a/configs/ablations/ladder/make_corewar.yaml b/configs/ablations/ladder/make_corewar.yaml new file mode 100644 index 00000000..7e176893 --- /dev/null +++ b/configs/ablations/ladder/make_corewar.yaml @@ -0,0 +1,537 @@ +tournament: + rounds: 0 +game: + name: CoreWar + sims_per_round: 2000 +players: +- agent: dummy + branch_init: human/0stormbringer +- agent: dummy + branch_init: human/abomination +- agent: dummy + branch_init: human/aeka +- agent: dummy + branch_init: human/agony31 +- agent: dummy + branch_init: human/agony51 +- agent: dummy + branch_init: human/agonyii +- agent: dummy + branch_init: human/alladinscave +- agent: dummy + branch_init: human/armadillo +- agent: dummy + branch_init: human/armorya5 +- agent: dummy + branch_init: human/arrow +- agent: dummy + branch_init: human/artofcorewar +- agent: dummy + branch_init: human/azathoth +- agent: dummy + branch_init: human/backstabber +- agent: dummy + branch_init: human/barrage +- agent: dummy + branch_init: human/bayonet +- agent: dummy + branch_init: human/behemot +- agent: dummy + branch_init: human/beholderseye17 +- agent: dummy + branch_init: human/bigitalshot +- agent: dummy + branch_init: human/bitethebullet +- agent: dummy + branch_init: human/blacken +- agent: dummy + branch_init: human/blackknight +- agent: dummy + branch_init: human/blade +- agent: dummy + branch_init: human/blizzard +- agent: dummy + branch_init: human/bloodlust +- agent: dummy + branch_init: human/blowrag +- agent: dummy + branch_init: human/bluecandle +- agent: dummy + branch_init: human/bluefunk +- agent: dummy + branch_init: human/bluefunk3 +- agent: dummy + branch_init: human/blur +- agent: dummy + branch_init: human/blur2 +- agent: dummy + branch_init: human/blur88 +- agent: dummy + branch_init: human/borg +- agent: dummy + branch_init: human/borgir +- agent: dummy + branch_init: human/boysarebackintown +- agent: dummy + branch_init: human/bpanamax +- agent: dummy + branch_init: human/bscannersliveinvain +- agent: dummy + branch_init: human/bulldozed +- agent: dummy + branch_init: human/burningmetal +- agent: dummy + branch_init: human/cannonade +- agent: dummy + branch_init: human/capskeyisstuck +- agent: dummy + branch_init: human/carmilla +- agent: dummy + branch_init: human/chainlockv02a +- agent: dummy + branch_init: human/chameleon +- agent: dummy + branch_init: human/charonv70 +- agent: dummy + branch_init: human/charonv81 +- agent: dummy + branch_init: human/chimerav35 +- agent: dummy + branch_init: human/cinammon +- agent: dummy + branch_init: human/claw +- agent: dummy + branch_init: human/cloudburst +- agent: dummy + branch_init: human/combatra +- agent: dummy + branch_init: human/crimp +- agent: dummy + branch_init: human/crimp2 +- agent: dummy + branch_init: human/curseoftheundead +- agent: dummy + branch_init: human/damageincorporated +- agent: dummy + branch_init: human/danceoffallenangels +- agent: dummy + branch_init: human/dawn +- agent: dummy + branch_init: human/dawn2 +- agent: dummy + branch_init: human/decoysignal +- agent: dummy + branch_init: human/defensive +- agent: dummy + branch_init: human/devilish202 +- agent: dummy + branch_init: human/devilstick +- agent: dummy + branch_init: human/diehard +- agent: dummy + branch_init: human/digitalis2003 +- agent: dummy + branch_init: human/discord +- agent: dummy + branch_init: human/disharmonious +- agent: dummy + branch_init: human/dust07 +- agent: dummy + branch_init: human/dwarf +- agent: dummy + branch_init: human/eccentric +- agent: dummy + branch_init: human/electrichead +- agent: dummy + branch_init: human/electricrazor +- agent: dummy + branch_init: human/elementaldust2 +- agent: dummy + branch_init: human/elvenking +- agent: dummy + branch_init: human/enigma +- agent: dummy + branch_init: human/eternalexile +- agent: dummy + branch_init: human/evolcap66 +- agent: dummy + branch_init: human/evoltmp88 +- agent: dummy + branch_init: human/excalibur +- agent: dummy + branch_init: human/falconv03 +- agent: dummy + branch_init: human/fastfoodv21 +- agent: dummy + branch_init: human/fatexpansionv +- agent: dummy + branch_init: human/fireandice +- agent: dummy + branch_init: human/firestorm +- agent: dummy + branch_init: human/fixed +- agent: dummy + branch_init: human/flashpaper +- agent: dummy + branch_init: human/flashpaper37 +- agent: dummy + branch_init: human/flurry +- agent: dummy + branch_init: human/flypaper30 +- agent: dummy + branch_init: human/foggyswamp +- agent: dummy + branch_init: human/forgottenlore +- agent: dummy + branch_init: human/forgottenlore2 +- agent: dummy + branch_init: human/forjohn +- agent: dummy + branch_init: human/freighttrain +- agent: dummy + branch_init: human/frothandfizzle +- agent: dummy + branch_init: human/gammapaper30 +- agent: dummy + branch_init: human/gargantuan +- agent: dummy + branch_init: human/gemoftheocean +- agent: dummy + branch_init: human/genocide +- agent: dummy + branch_init: human/gigolo +- agent: dummy + branch_init: human/goldeneye +- agent: dummy + branch_init: human/gothik +- agent: dummy + branch_init: human/gremlin +- agent: dummy + branch_init: human/grendelsrevenge +- agent: dummy + branch_init: human/griffin2 +- agent: dummy + branch_init: human/grilledoctopus05 +- agent: dummy + branch_init: human/halcyon +- agent: dummy + branch_init: human/hazylazy +- agent: dummy + branch_init: human/hazylazyc11 +- agent: dummy + branch_init: human/hazyshadeii +- agent: dummy + branch_init: human/hector2 +- agent: dummy + branch_init: human/hellfire +- agent: dummy + branch_init: human/herbalavenger +- agent: dummy + branch_init: human/heremscimitar +- agent: dummy + branch_init: human/hordesofmicrowarriors +- agent: dummy + branch_init: human/hullab3loo +- agent: dummy + branch_init: human/hullabaloo +- agent: dummy + branch_init: human/hydra +- agent: dummy + branch_init: human/icedragon +- agent: dummy + branch_init: human/impfinityv4g1 +- agent: dummy + branch_init: human/impishv02 +- agent: dummy + branch_init: human/imprimis6 +- agent: dummy + branch_init: human/impurge +- agent: dummy + branch_init: human/infiltrator +- agent: dummy + branch_init: human/intotheunknown +- agent: dummy + branch_init: human/irongate +- agent: dummy + branch_init: human/ironicimps +- agent: dummy + branch_init: human/jackinthebox +- agent: dummy + branch_init: human/jackintheboxii +- agent: dummy + branch_init: human/jade +- agent: dummy + branch_init: human/jinx +- agent: dummy + branch_init: human/julietandpaper +- agent: dummy + branch_init: human/keystonet13 +- agent: dummy + branch_init: human/killerinstinct +- agent: dummy + branch_init: human/kitchensinkii +- agent: dummy + branch_init: human/kosmos +- agent: dummy + branch_init: human/kryptonite +- agent: dummy + branch_init: human/kusanagi +- agent: dummy + branch_init: human/labomba +- agent: dummy + branch_init: human/lastjudgement +- agent: dummy + branch_init: human/leapfrog +- agent: dummy + branch_init: human/leprechaun1b +- agent: dummy + branch_init: human/leprechaunonspeed +- agent: dummy + branch_init: human/leviathan +- agent: dummy + branch_init: human/lithium +- agent: dummy + branch_init: human/lithobolia +- agent: dummy + branch_init: human/luca +- agent: dummy + branch_init: human/lucky3 +- agent: dummy + branch_init: human/macromagic +- agent: dummy + branch_init: human/maelstrom +- agent: dummy + branch_init: human/mandragora +- agent: dummy + branch_init: human/mascafe +- agent: dummy + branch_init: human/mason20 +- agent: dummy + branch_init: human/medusasv7x +- agent: dummy + branch_init: human/mercenary +- agent: dummy + branch_init: human/mirage15 +- agent: dummy + branch_init: human/mirage2 +- agent: dummy + branch_init: human/myvamp37 +- agent: dummy + branch_init: human/myvamp54 +- agent: dummy + branch_init: human/ncdecoy +- agent: dummy + branch_init: human/neith +- agent: dummy + branch_init: human/nemesis +- agent: dummy + branch_init: human/nightofthelivingdead +- agent: dummy + branch_init: human/nightterrors +- agent: dummy + branch_init: human/nighttrain +- agent: dummy + branch_init: human/nomuckingabout +- agent: dummy + branch_init: human/nosferatu +- agent: dummy + branch_init: human/notepaper +- agent: dummy + branch_init: human/npaperii +- agent: dummy + branch_init: human/numb +- agent: dummy + branch_init: human/oblivion +- agent: dummy + branch_init: human/olivia +- agent: dummy + branch_init: human/ompega +- agent: dummy + branch_init: human/onebite +- agent: dummy + branch_init: human/pacman +- agent: dummy + branch_init: human/paratroopsv21 +- agent: dummy + branch_init: human/pdqscan +- agent: dummy + branch_init: human/pendulum +- agent: dummy + branch_init: human/perseus +- agent: dummy + branch_init: human/phq +- agent: dummy + branch_init: human/precipice +- agent: dummy + branch_init: human/probe +- agent: dummy + branch_init: human/pspace +- agent: dummy + branch_init: human/quicksilver +- agent: dummy + branch_init: human/quiz +- agent: dummy + branch_init: human/rave +- agent: dummy + branch_init: human/recon2 +- agent: dummy + branch_init: human/recycledbits +- agent: dummy + branch_init: human/reepicheep +- agent: dummy + branch_init: human/replicant +- agent: dummy + branch_init: human/requestv20 +- agent: dummy + branch_init: human/retroq +- agent: dummy + branch_init: human/returnofthefugitive +- agent: dummy + branch_init: human/returnofthejedimp +- agent: dummy + branch_init: human/returnofthelivingdead +- agent: dummy + branch_init: human/returnofthependragon +- agent: dummy + branch_init: human/returnofvanquisher +- agent: dummy + branch_init: human/revengeofthepapers +- agent: dummy + branch_init: human/revivalfire +- agent: dummy + branch_init: human/riseofthedragon +- agent: dummy + branch_init: human/rosebud +- agent: dummy + branch_init: human/rust +- agent: dummy + branch_init: human/safetyinnumbers +- agent: dummy + branch_init: human/seventyfive +- agent: dummy + branch_init: human/shottonothing +- agent: dummy + branch_init: human/silking +- agent: dummy + branch_init: human/silkworm +- agent: dummy + branch_init: human/simple88v2 +- agent: dummy + branch_init: human/simplicity +- agent: dummy + branch_init: human/sj4a +- agent: dummy + branch_init: human/slimetest +- agent: dummy + branch_init: human/smoothnoodlemap +- agent: dummy + branch_init: human/smoothnoodlemap6 +- agent: dummy + branch_init: human/snake +- agent: dummy + branch_init: human/sneakyb2 +- agent: dummy + branch_init: human/snowscan +- agent: dummy + branch_init: human/soldieroffortune +- agent: dummy + branch_init: human/sonofvain +- agent: dummy + branch_init: human/sphinxv28 +- agent: dummy + branch_init: human/spiritualblackdimension +- agent: dummy + branch_init: human/sprawlingchaos +- agent: dummy + branch_init: human/sputnik +- agent: dummy + branch_init: human/stalker +- agent: dummy + branch_init: human/stasis +- agent: dummy + branch_init: human/steppingstone +- agent: dummy + branch_init: human/stoninc +- agent: dummy + branch_init: human/stormkeeper +- agent: dummy + branch_init: human/stylizedeuphoria +- agent: dummy + branch_init: human/sunset +- agent: dummy + branch_init: human/thefugitive +- agent: dummy + branch_init: human/thehistorian +- agent: dummy + branch_init: human/themystery +- agent: dummy + branch_init: human/thenextstep88 +- agent: dummy + branch_init: human/thermite10 +- agent: dummy + branch_init: human/thermiteii +- agent: dummy + branch_init: human/thunderstrike +- agent: dummy + branch_init: human/timescape10 +- agent: dummy + branch_init: human/tnt +- agent: dummy + branch_init: human/torcht18 +- agent: dummy + branch_init: human/torment +- agent: dummy + branch_init: human/tornado30 +- agent: dummy + branch_init: human/toxic +- agent: dummy + branch_init: human/trinity +- agent: dummy + branch_init: human/ttti +- agent: dummy + branch_init: human/twilightpitsv60 +- agent: dummy + branch_init: human/twister +- agent: dummy + branch_init: human/unheardof +- agent: dummy + branch_init: human/uninvited +- agent: dummy + branch_init: human/unpit +- agent: dummy + branch_init: human/unpitq +- agent: dummy + branch_init: human/unrequitedlove +- agent: dummy + branch_init: human/vagabond +- agent: dummy + branch_init: human/vain +- agent: dummy + branch_init: human/validate +- agent: dummy + branch_init: human/valkyrie +- agent: dummy + branch_init: human/vamp02b +- agent: dummy + branch_init: human/vampsareback02 +- agent: dummy + branch_init: human/vanquisher +- agent: dummy + branch_init: human/whitemist +- agent: dummy + branch_init: human/win +- agent: dummy + branch_init: human/winterwerewolf3 +- agent: dummy + branch_init: human/xenosmilus +- agent: dummy + branch_init: human/yogibear +- agent: dummy + branch_init: human/ziggy +- agent: dummy + branch_init: human/zooom +- agent: dummy + branch_init: human/zygote +prompts: + game_description: |- + Core War ladder diff --git a/configs/ablations/ladder/make_robotrumble.yaml b/configs/ablations/ladder/make_robotrumble.yaml new file mode 100644 index 00000000..4ac12295 --- /dev/null +++ b/configs/ablations/ladder/make_robotrumble.yaml @@ -0,0 +1,127 @@ +tournament: + rounds: 0 +game: + name: RobotRumble + sims_per_round: 250 + args: + raw: false +players: +- agent: dummy + branch_init: human/aaa/jippty5 +- agent: dummy + branch_init: human/aaoutkine/dark-knight +- agent: dummy + branch_init: human/aaoutkine/school-bot +- agent: dummy + branch_init: human/aaoutkine/silo34 +- agent: dummy + branch_init: human/aayyad/testbot +- agent: dummy + branch_init: human/anton/anton3000 +- agent: dummy + branch_init: human/anton/anton4000 +- agent: dummy + branch_init: human/anton/om-om +- agent: dummy + branch_init: human/anton/wallifier +- agent: dummy + branch_init: human/atl15/centerrr +- agent: dummy + branch_init: human/clay/diag-lattice +- agent: dummy + branch_init: human/devchris/black_magic +- agent: dummy + branch_init: human/devchris/first_test +- agent: dummy + branch_init: human/edward/flail +- agent: dummy + branch_init: human/entropicdrifter/gigachad +- agent: dummy + branch_init: human/entropicdrifter/glommer +- agent: dummy + branch_init: human/entropicdrifter/glommerv2 +- agent: dummy + branch_init: human/entropicdrifter/seven-of-nine +- agent: dummy + branch_init: human/entropicdrifter/we-are-borg +- agent: dummy + branch_init: human/essickmango/fruity-test +- agent: dummy + branch_init: human/essickmango/pickle-up +- agent: dummy + branch_init: human/gerenuk/gere-ape +- agent: dummy + branch_init: human/happysquid/test +- agent: dummy + branch_init: human/jammyliu/sixty-nine-line +- agent: dummy + branch_init: human/jay0jayjay/naivestarter +- agent: dummy + branch_init: human/jiricodes/jiricodes-bot +- agent: dummy + branch_init: human/kalkin/artemis +- agent: dummy + branch_init: human/kalkin/artemis2 +- agent: dummy + branch_init: human/kalkin/maxad +- agent: dummy + branch_init: human/ketza/arthur +- agent: dummy + branch_init: human/ketza/bob +- agent: dummy + branch_init: human/lanity/sivuy +- agent: dummy + branch_init: human/ldang/nemo +- agent: dummy + branch_init: human/ldang/nessy +- agent: dummy + branch_init: human/luisa/baselinegere +- agent: dummy + branch_init: human/luisa/luisasrobot +- agent: dummy + branch_init: human/mario31313/alpha_13 +- agent: dummy + branch_init: human/mee42/follow-bot +- agent: dummy + branch_init: human/mitch84/crw_preempt +- agent: dummy + branch_init: human/mitch84/retreat_walk2 +- agent: dummy + branch_init: human/mitch84/walk_retreat +- agent: dummy + branch_init: human/mjburgess/rule99 +- agent: dummy + branch_init: human/mkap/test +- agent: dummy + branch_init: human/mountain/neuralbot1-1h +- agent: dummy + branch_init: human/mountain/neuralbot2-6h +- agent: dummy + branch_init: human/mountain/neuralbot4-3h +- agent: dummy + branch_init: human/mousetail/coward-bot +- agent: dummy + branch_init: human/mousetail/genetic-robot +- agent: dummy + branch_init: human/navster8/bash-brothers +- agent: dummy + branch_init: human/navster8/maginot-line +- agent: dummy + branch_init: human/sbasu3/meek-bot +- agent: dummy + branch_init: human/sivecano/clouded-mind +- agent: dummy + branch_init: human/suddenlyseals/control-center +- agent: dummy + branch_init: human/tabaxi3k/black-magic-1 +- agent: dummy + branch_init: human/tabaxi3k/charles +- agent: dummy + branch_init: human/thesmilingturtl/naivefaa +- agent: dummy + branch_init: human/underscore/bot1 +- agent: dummy + branch_init: human/wolfsleuth/simple +prompts: + game_description: |- + RobotRumble ladder diff --git a/configs/ablations/ladder/robotrumble.yaml b/configs/ablations/ladder/robotrumble.yaml index 4ac12295..936a811f 100644 --- a/configs/ablations/ladder/robotrumble.yaml +++ b/configs/ablations/ladder/robotrumble.yaml @@ -1,127 +1,140 @@ tournament: - rounds: 0 + rounds: 5 game: name: RobotRumble sims_per_round: 250 args: raw: false -players: -- agent: dummy - branch_init: human/aaa/jippty5 -- agent: dummy - branch_init: human/aaoutkine/dark-knight +player: + agent: mini + name: claude-sonnet-4-5-20250929 + branch_init: human/anton/anton3000 + config: + agent: !include mini/default.yaml + model: + model_name: '@anthropic/claude-sonnet-4-5-20250929' + model_class: portkey + model_kwargs: + temperature: 0.2 + max_tokens: 4096 + push: True +prompts: + game_description: |- + RobotRumble ladder +ladder: - agent: dummy - branch_init: human/aaoutkine/school-bot + branch_init: human/anton/anton3000 - agent: dummy - branch_init: human/aaoutkine/silo34 + branch_init: human/happysquid/test - agent: dummy - branch_init: human/aayyad/testbot + branch_init: human/anton/wallifier - agent: dummy - branch_init: human/anton/anton3000 + branch_init: human/ldang/nessy - agent: dummy - branch_init: human/anton/anton4000 + branch_init: human/ldang/nemo - agent: dummy - branch_init: human/anton/om-om + branch_init: human/navster8/bash-brothers - agent: dummy - branch_init: human/anton/wallifier + branch_init: human/aaoutkine/dark-knight - agent: dummy - branch_init: human/atl15/centerrr + branch_init: human/mountain/neuralbot1-1h - agent: dummy - branch_init: human/clay/diag-lattice + branch_init: human/sivecano/clouded-mind - agent: dummy - branch_init: human/devchris/black_magic + branch_init: human/mountain/neuralbot2-6h - agent: dummy - branch_init: human/devchris/first_test + branch_init: human/kalkin/artemis - agent: dummy - branch_init: human/edward/flail + branch_init: human/kalkin/artemis2 - agent: dummy - branch_init: human/entropicdrifter/gigachad + branch_init: human/navster8/maginot-line - agent: dummy - branch_init: human/entropicdrifter/glommer + branch_init: human/jiricodes/jiricodes-bot - agent: dummy - branch_init: human/entropicdrifter/glommerv2 + branch_init: human/sbasu3/meek-bot - agent: dummy - branch_init: human/entropicdrifter/seven-of-nine + branch_init: human/essickmango/fruity-test - agent: dummy - branch_init: human/entropicdrifter/we-are-borg + branch_init: human/tabaxi3k/charles - agent: dummy - branch_init: human/essickmango/fruity-test + branch_init: human/devchris/first_test - agent: dummy - branch_init: human/essickmango/pickle-up + branch_init: human/aaa/jippty5 - agent: dummy - branch_init: human/gerenuk/gere-ape + branch_init: human/jay0jayjay/naivestarter - agent: dummy - branch_init: human/happysquid/test + branch_init: human/luisa/luisasrobot - agent: dummy - branch_init: human/jammyliu/sixty-nine-line + branch_init: human/luisa/baselinegere - agent: dummy - branch_init: human/jay0jayjay/naivestarter + branch_init: human/anton/anton4000 - agent: dummy - branch_init: human/jiricodes/jiricodes-bot + branch_init: human/aayyad/testbot - agent: dummy - branch_init: human/kalkin/artemis + branch_init: human/edward/flail - agent: dummy - branch_init: human/kalkin/artemis2 + branch_init: human/mousetail/genetic-robot - agent: dummy branch_init: human/kalkin/maxad - agent: dummy - branch_init: human/ketza/arthur + branch_init: human/mjburgess/rule99 - agent: dummy branch_init: human/ketza/bob - agent: dummy - branch_init: human/lanity/sivuy + branch_init: human/suddenlyseals/control-center - agent: dummy - branch_init: human/ldang/nemo + branch_init: human/aaoutkine/school-bot - agent: dummy - branch_init: human/ldang/nessy + branch_init: human/thesmilingturtl/naivefaa - agent: dummy - branch_init: human/luisa/baselinegere + branch_init: human/mario31313/alpha_13 - agent: dummy - branch_init: human/luisa/luisasrobot + branch_init: human/underscore/bot1 - agent: dummy - branch_init: human/mario31313/alpha_13 + branch_init: human/lanity/sivuy - agent: dummy branch_init: human/mee42/follow-bot - agent: dummy - branch_init: human/mitch84/crw_preempt + branch_init: human/anton/om-om - agent: dummy - branch_init: human/mitch84/retreat_walk2 + branch_init: human/aaoutkine/silo34 - agent: dummy - branch_init: human/mitch84/walk_retreat + branch_init: human/mountain/neuralbot4-3h - agent: dummy - branch_init: human/mjburgess/rule99 + branch_init: human/ketza/arthur - agent: dummy branch_init: human/mkap/test - agent: dummy - branch_init: human/mountain/neuralbot1-1h + branch_init: human/essickmango/pickle-up - agent: dummy - branch_init: human/mountain/neuralbot2-6h + branch_init: human/wolfsleuth/simple - agent: dummy - branch_init: human/mountain/neuralbot4-3h + branch_init: human/gerenuk/gere-ape - agent: dummy - branch_init: human/mousetail/coward-bot + branch_init: human/clay/diag-lattice - agent: dummy - branch_init: human/mousetail/genetic-robot + branch_init: human/atl15/centerrr - agent: dummy - branch_init: human/navster8/bash-brothers + branch_init: human/jammyliu/sixty-nine-line - agent: dummy - branch_init: human/navster8/maginot-line + branch_init: human/mitch84/walk_retreat - agent: dummy - branch_init: human/sbasu3/meek-bot + branch_init: human/tabaxi3k/black-magic-1 - agent: dummy - branch_init: human/sivecano/clouded-mind + branch_init: human/devchris/black_magic - agent: dummy - branch_init: human/suddenlyseals/control-center + branch_init: human/mitch84/retreat_walk2 - agent: dummy - branch_init: human/tabaxi3k/black-magic-1 + branch_init: human/mitch84/crw_preempt - agent: dummy - branch_init: human/tabaxi3k/charles + branch_init: human/entropicdrifter/glommer - agent: dummy - branch_init: human/thesmilingturtl/naivefaa + branch_init: human/mousetail/coward-bot - agent: dummy - branch_init: human/underscore/bot1 + branch_init: human/entropicdrifter/glommerv2 - agent: dummy - branch_init: human/wolfsleuth/simple -prompts: - game_description: |- - RobotRumble ladder + branch_init: human/entropicdrifter/we-are-borg +- agent: dummy + branch_init: human/entropicdrifter/seven-of-nine +- agent: dummy + branch_init: human/entropicdrifter/gigachad diff --git a/scripts/run_ladder.py b/scripts/run_ladder.py index e64513bf..13001bc3 100644 --- a/scripts/run_ladder.py +++ b/scripts/run_ladder.py @@ -1,4 +1,6 @@ import argparse +import getpass +import time from pathlib import Path import yaml @@ -11,38 +13,114 @@ def main( config_path: Path, + *, + cleanup: bool = False, + output_dir: Path | None = None, + suffix: str = "", + keep_containers: bool = False, ): yaml_content = config_path.read_text() preprocessed_yaml = resolve_includes(yaml_content, base_dir=CONFIG_DIR) config = yaml.safe_load(preprocessed_yaml) + ladder, player, rounds, sims = ( + config["ladder"], + config["player"], + config["tournament"]["rounds"], + config["game"]["sims_per_round"], + ) + timestamp = time.strftime("%y%m%d%H%M%S") + del config["player"] + del config["ladder"] + ladder_folder = f"LadderTournament.{config['game']['name']}.r{rounds}.s{sims}.{timestamp}" + player["branch"] = ladder_folder + parent_dir = LOCAL_LOG_DIR / getpass.getuser() / ladder_folder + + for idx, opponent in enumerate(ladder): + opponent_rank = len(ladder) - idx + opponent["name"] = opponent["branch_init"].replace("human/", "").replace("/", "_") + if "branch_init" in player and idx > 0: + # After first opponent, remove branch_init so that player continues from previous tournament's codebase + del player["branch_init"] + c = { + **config, + "players": [ + player, + opponent, + ], + } + + players = [p["name"] for p in c["players"]] + p_num = len(players) + p_list = ".".join(players) + suffix_part = f".{suffix}" if suffix else "" + folder_name = f"PvpTournament.{c['game']['name']}.r{rounds}.s{sims}.p{p_num}.{p_list}{suffix_part}" + + tournament_dir = parent_dir / folder_name if output_dir is None else output_dir / folder_name + tournament = PvpTournament( + c, + output_dir=tournament_dir, + cleanup=cleanup, + keep_containers=keep_containers, + ) + tournament.run() + + # Get results + metadata_path = tournament_dir / "metadata.json" + with open(metadata_path) as f: + metadata = yaml.safe_load(f) + round_winners = [r["winner"] for r in metadata["round_stats"].values()] - players = config["players"] - num_players = len(players) - for i in range(num_players): - for j in range(i + 1, num_players): - player1 = players[i] - player1["name"] = player1["branch_init"] - player2 = players[j] - player2["name"] = player2["branch_init"] - pvp_config = { - **config, - "players": [player1, player2], - } - vs = f"PvpTournament.{player1['name']}_vs_{player2['name']}".replace("/", "_") - output_dir = LOCAL_LOG_DIR / "ladder" / config["game"]["name"] / vs - try: - tournament = PvpTournament(pvp_config, output_dir=output_dir) - except FileExistsError: - continue - tournament.run() + # Player must have won majority of rounds and the last round to continue ladder + player_wins = sum(1 for w in round_winners if w == player["name"]) + player_won_last = round_winners[-1] == player["name"] + + if not player_wins > len(round_winners) // 2 or not player_won_last: + # If player lost tournament, ladder challenge ends + break + + print("=" * 10) + print( + f"{player['name']} successfully beat {opponent['name']} (rank {opponent_rank}/{len(ladder)}) " + f"in {player_wins}/{len(round_winners)} rounds.\n" + "Ladder challenge continuing" + ) + print("=" * 10) + + print(f"Ladder tournament complete. Logs saved to {parent_dir}") + print(f"Final opponent faced: {opponent['name']} (rank {opponent_rank}/{len(ladder)} in ladder)") def main_cli(argv: list[str] | None = None): - parser = argparse.ArgumentParser(description="CodeClash Ladder Runner") + parser = argparse.ArgumentParser(description="CodeClash") parser.add_argument( "config_path", type=Path, - help="Path to the ladder configuration YAML file.", + help="Path to the config file.", + ) + parser.add_argument( + "-c", + "--cleanup", + action="store_true", + help="If set, do not clean up the game environment after running.", + ) + parser.add_argument( + "-o", + "--output-dir", + type=Path, + help="Sets the output directory (default is 'logs' with current user subdirectory).", + ) + parser.add_argument( + "-s", + "--suffix", + type=str, + help="Suffix to attach to the folder name. Does not include leading dot or underscore.", + default="", + ) + parser.add_argument( + "-k", + "--keep-containers", + action="store_true", + help="Do not remove containers after games/agent finish", ) args = parser.parse_args(argv) main(**vars(args))