Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
42c71d9
creates a multiagent model
liamlaverty May 11, 2023
fbada71
begins to add an array'd environment
liamlaverty May 11, 2023
cf85b43
updates random sample `run` to use env arrays
liamlaverty May 11, 2023
5333a10
agents pass actions and rewards back and forwards correctly
liamlaverty May 12, 2023
49c495b
updates the config to run learn algo
liamlaverty May 12, 2023
7918e20
creates a multidiscrete version
liamlaverty May 12, 2023
5ee2d82
creates a multidiscrete version of the application
liamlaverty May 12, 2023
7a7b7f0
adjusts action and observation spaces to pass env validations
liamlaverty May 13, 2023
18aba5e
configures the model to run training
liamlaverty May 13, 2023
349e857
model learns behaviour
liamlaverty May 13, 2023
f16dcf0
refactors some var names
liamlaverty May 13, 2023
edd425d
WIP, setting up consumers
liamlaverty May 13, 2023
5e56200
adds some vending based on price points
liamlaverty May 14, 2023
01b4c0a
corrects a bug where vendors were setting negative prices
liamlaverty May 14, 2023
82c6d4e
corrects a bug where vendors were giving away products for free
liamlaverty May 14, 2023
c6dd563
penalises the agents for charging less than the wholesale price
liamlaverty May 14, 2023
1d885a3
improves performance a little
liamlaverty May 14, 2023
8676b98
reduces the problem space to ints instead of floats
liamlaverty May 14, 2023
0cb69ed
config changes
liamlaverty May 14, 2023
50bdade
oligopoly config
liamlaverty May 14, 2023
a597fcd
cleans up some code
liamlaverty May 15, 2023
269c413
massively reduces the problem-space
liamlaverty May 15, 2023
52f68f2
starts logging custom vars to tensorboards
liamlaverty May 15, 2023
dc57b6e
WIP adding better logging
liamlaverty May 15, 2023
5d3790c
makes the logging clearer, and logs for each agent
liamlaverty May 15, 2023
a774845
logging debugging
liamlaverty May 15, 2023
ac56de9
clears the agent individual results between steps
liamlaverty May 15, 2023
ca250d8
updates logging paths
liamlaverty May 15, 2023
c14033c
Merge pull request #5 from liamlaverty/MAC-04-low-resolution-action-s…
liamlaverty May 15, 2023
b55c8b2
distributes sales across agents
liamlaverty May 15, 2023
4dfc6ff
configures for monop
liamlaverty May 15, 2023
6a6dc10
sets up the model to loop through each market type
liamlaverty May 16, 2023
310a5b9
adjusts the scope of possible price points
liamlaverty May 16, 2023
0cb582b
updates the agent action space to 15 instead of 10 px options
liamlaverty May 16, 2023
87eaa64
corrects a bug where too many products were being sold per loop
liamlaverty May 16, 2023
a2fcc0b
bug corrections
liamlaverty May 17, 2023
7a28d72
adds a new tracking param
liamlaverty May 17, 2023
76249b5
Update .gitignore
liamlaverty May 17, 2023
1e13d41
Update multiagent_main.py
liamlaverty May 17, 2023
ccdc84c
Merge branch 'MAC-03-multiagent-model' of https://github.com/liamlave…
liamlaverty May 17, 2023
af75efc
removes some NaN values from logging, adds MC pressure
liamlaverty May 17, 2023
964ea86
models for bertrand comp
liamlaverty May 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Macodiac.ML/.gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
training/results/logs/*
training_multiagent/results/logs/*
!training_multiagent/results/logs/.keep
!training/results/logs/.keep
training/results/saved_models/*
!training/results/saved_models/.keep
training_multiagent/results/saved_models/*
!training_multiagent/results/saved_models/.keep
training_multiagent_16may/results/logs/*
training_multiagent_16may/results/saved_models/*
training_multiagent/results_price_setter/logs/*
training_multiagent/results_price_setter/saved_models/*


# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 0 additions & 1 deletion Macodiac.ML/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def step(self, action):
self.state += action - 1
self.environment_timesteps -=1


if self.state > 0:
reward = 1
elif self.state == 0:
Expand Down
5 changes: 3 additions & 2 deletions Macodiac.ML/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import gymnasium as gym
from gymnasium import Env
from environment import MacodiacEnvironment
from multiagentenvironment import MultiAgentMacodiacEnvironment
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np

Expand All @@ -22,7 +23,7 @@ def __init__(self):
self.save_path = os.path.join(filePath,'saved_models', 'model')
self.save_path_intermittent = os.path.join(filePath,'saved_models', 'intermittent_saved_models')
self.env = MacodiacEnvironment(envTimesteps=100)
self.numTrainingIterations = 10_000_000
self.numTrainingIterations = 10_000
self.numEpisodes = 10


Expand All @@ -31,7 +32,7 @@ def __init__(self):
# NOTES:
# if loadmodel is set to false, and trainmodel is set to true,
# the currently saved model is overwritten
self.__MODE_LOADMODEL__ = True
self.__MODE_LOADMODEL__ =False

# set to true if you want to train and then save the model
self.__MODE_TRAINMODEL__ = True
Expand Down
207 changes: 207 additions & 0 deletions Macodiac.ML/md_multiagent_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import os
import gymnasium as gym
from gymnasium import Env
from environment import MacodiacEnvironment
from md_multiagentenvironment import MdMultiAgentMacodiacEnvironment
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
import numpy as np
import random



from stable_baselines3 import PPO


class MultiagentMain():
isRunning = False

def __init__(self):
"""
init the class
"""
filePath = os.path.join('Macodiac.ML', 'training_multiagent','results')
self.log_path = os.path.join(filePath,'Logs')
self.save_path = os.path.join(filePath,'saved_models', 'model')
self.save_path_intermittent = os.path.join(filePath,'saved_models', 'intermittent_saved_models')
self.numTrainingIterations = 10
self.numEpisodes = 10
self.numAgents = 3

self.env = MdMultiAgentMacodiacEnvironment(envTimesteps=15, numAgents=self.numAgents)
check_env(self.env)


# set to true if you want to load an existing model
# model loading happens first, then training
# NOTES:
# if loadmodel is set to false, and trainmodel is set to true,
# the currently saved model is overwritten
self.__MODE_LOADMODEL__ = False

# set to true if you want to train and then save the model
self.__MODE_TRAINMODEL__ = True

# set to true to use the randomsample mode for testing,
# rather than the model version
self.__MODE_RANDOMSAMPLE__ = False


def Run(self):
"""
Runs the project
"""

if self.__MODE_RANDOMSAMPLE__:
self.run_multiagent_project_with_rand_test(self.env, 5)

model = self.create_model(self.env, self.log_path)

if self.__MODE_LOADMODEL__:
model = self.load_model(self.env, model, self.save_path)

if self.__MODE_TRAINMODEL__:
model = self.train_model(model,
self.numTrainingIterations, self.save_path_intermittent)
self.save_model(model, self.save_path)


else:
self.run_project(self.env, self.numEpisodes, model)
self.policy_evaluation(model, self.env, self.numEpisodes)


def run_multiagent_project_with_rand_test(self, env:MdMultiAgentMacodiacEnvironment, numEpisodes: int):
"""
Runs the project with random sampling, using the multiagent env
"""

for episode in range(numEpisodes):
obs = env.reset()
done = False
score = 0
agent_scores = []
iterator = 0
while not done:
#env.render()
iterator+=1
print(f'iterator:{iterator}')
action_arr = []
for i in range(len(env.policy_agents)):
action_arr.append(random.randint(0,2))
# agentActionSpace = env.action_space[i]
# actionForAgent = agentActionSpace.sample()
# action_arr.append(actionForAgent)

print(f'action for agents:\t{action_arr}')

obs_arr, reward_arr, done_arr, isTerminal, info_arr = env.step(action_arr)

# for i, reward in enumerate(reward_arr):
# print(f'reward is {reward}')

agent_scores.append(sum(reward_arr))

print(f'rewards for agents:\t{reward_arr}')

if any(done_arr):
isTerminal = True

done = isTerminal
print(f'Episode:{episode} | Aggregate agent scores:(Sum:{sum(agent_scores)})')
env.close()



def run_project(self, env:MdMultiAgentMacodiacEnvironment, numEpisodes: int, model):
"""
Runs the project with an actual model, instead of random sampling
of a model

@param env: The environment to run this project with
@param numEpisodes: the count of episodes to run the environment for
"""
scores = []
for episode in range(numEpisodes):
obs = env.reset()
done = False
score = 0
while not done:
#env.render()
action, _discard = model.predict(obs)
obs, reward, isTerminal, info = env.step(action)
score += reward
done = isTerminal
scores.append(score)

runningAvg = np.mean(scores)

print(f'Episode:{episode} \t| Score:{score} \t\t| RunningAvg: {round(runningAvg, 2)}')
env.close()


def create_model(self, env: MdMultiAgentMacodiacEnvironment, log_path: str):
env.reset()
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
return model


def train_model(self, model, numTimesteps: int, savePath:str):
"""
Trains a model with the number of iterations in
numtimesteps. Creates a n intermediate save every 1m iterations

@param model: The model to train. The model must have been instantiated
@param numTimesteps: the number of training iterations
"""

saveEveryNSteps = 1_000_000

if numTimesteps < saveEveryNSteps:
model.learn(total_timesteps=numTimesteps)

else:
rangeUpper = int(numTimesteps / saveEveryNSteps)
for i in range(1,rangeUpper+1):
model.learn(total_timesteps=saveEveryNSteps)
model.save(os.path.join(savePath, f'interim-{i}'))

return model

def policy_evaluation(self, model, env: MdMultiAgentMacodiacEnvironment, numEpisodes:int=50):
"""
Prints a policy evaluation, including the mean episode reward
and the standard deviation

@param model: The model to be evaluated
@param env: The environment to evaluate the model against
@param numEpisodes: The count of episodes to evaluate against
"""
print('\nevalResult:(mean episode reward, standard deviation)')
print(f'evalResult:{evaluate_policy(model, env, n_eval_episodes=numEpisodes)}\n')



def save_model(self, model, modelPath):
"""
Saves a model to a given path

@param model: The model to save
@param modelPath: The path to save to
"""
model.save(modelPath)


def load_model(self, env: MdMultiAgentMacodiacEnvironment, model, modelPath: str):
"""
Saves a model to a given path

@param model: The model to save
@param modelPath: The modelPath to save to
"""
model = PPO.load(modelPath, env=env)
return model


main = MultiagentMain()
main.Run()
Loading