diff --git a/Macodiac.ML/.gitignore b/Macodiac.ML/.gitignore
index 197028e..32fa32c 100644
--- a/Macodiac.ML/.gitignore
+++ b/Macodiac.ML/.gitignore
@@ -1,7 +1,16 @@
 training/results/logs/*
+training_multiagent/results/logs/*
+!training_multiagent/results/logs/.keep
 !training/results/logs/.keep
 training/results/saved_models/*
 !training/results/saved_models/.keep
+training_multiagent/results/saved_models/*
+!training_multiagent/results/saved_models/.keep
+training_multiagent_16may/results/logs/*
+training_multiagent_16may/results/saved_models/*
+training_multiagent/results_price_setter/logs/*
+training_multiagent/results_price_setter/saved_models/*
+
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/Macodiac.ML/environment.py b/Macodiac.ML/environment.py
index fa1d065..498513f 100644
--- a/Macodiac.ML/environment.py
+++ b/Macodiac.ML/environment.py
@@ -41,7 +41,6 @@ def step(self, action):
         self.state += action - 1
         self.environment_timesteps -=1
 
-
         if self.state > 0:
             reward = 1
         elif self.state == 0:
diff --git a/Macodiac.ML/main.py b/Macodiac.ML/main.py
index 829146b..1619c2f 100644
--- a/Macodiac.ML/main.py
+++ b/Macodiac.ML/main.py
@@ -2,6 +2,7 @@
 import gymnasium as gym
 from gymnasium import Env
 from environment import MacodiacEnvironment
+from multiagentenvironment import MultiAgentMacodiacEnvironment
 from stable_baselines3.common.evaluation import evaluate_policy
 import numpy as np
 
@@ -22,7 +23,7 @@ def __init__(self):
         self.save_path =  os.path.join(filePath,'saved_models', 'model')
         self.save_path_intermittent =  os.path.join(filePath,'saved_models', 'intermittent_saved_models')
         self.env = MacodiacEnvironment(envTimesteps=100)
-        self.numTrainingIterations = 10_000_000
+        self.numTrainingIterations = 10_000
         self.numEpisodes = 10
 
 
@@ -31,7 +32,7 @@ def __init__(self):
         # NOTES: 
         #   if loadmodel is set to false, and trainmodel is set to true, 
         #   the currently saved model is overwritten
-        self.__MODE_LOADMODEL__ = True
+        self.__MODE_LOADMODEL__ =False
 
         # set to true if you want to train and then save the model
         self.__MODE_TRAINMODEL__ = True
diff --git a/Macodiac.ML/md_multiagent_main.py b/Macodiac.ML/md_multiagent_main.py
new file mode 100644
index 0000000..9c307c9
--- /dev/null
+++ b/Macodiac.ML/md_multiagent_main.py
@@ -0,0 +1,207 @@
+import os
+import gymnasium as gym
+from gymnasium import Env
+from environment import MacodiacEnvironment
+from md_multiagentenvironment import MdMultiAgentMacodiacEnvironment
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.env_checker import check_env
+import numpy as np
+import random
+
+
+
+from stable_baselines3 import PPO
+
+
+class MultiagentMain():
+    isRunning = False
+
+    def __init__(self):
+        """
+        init the class
+        """
+        filePath = os.path.join('Macodiac.ML', 'training_multiagent','results')
+        self.log_path =  os.path.join(filePath,'Logs')
+        self.save_path =  os.path.join(filePath,'saved_models', 'model')
+        self.save_path_intermittent =  os.path.join(filePath,'saved_models', 'intermittent_saved_models')
+        self.numTrainingIterations = 10
+        self.numEpisodes = 10
+        self.numAgents = 3
+
+        self.env = MdMultiAgentMacodiacEnvironment(envTimesteps=15, numAgents=self.numAgents)
+        check_env(self.env)
+
+
+        # set to true if you want to load an existing model
+        # model loading happens first, then training
+        # NOTES: 
+        #   if loadmodel is set to false, and trainmodel is set to true, 
+        #   the currently saved model is overwritten
+        self.__MODE_LOADMODEL__ = False
+
+        # set to true if you want to train and then save the model
+        self.__MODE_TRAINMODEL__ = True
+
+        # set to true to use the randomsample mode for testing, 
+        # rather than the model version
+        self.__MODE_RANDOMSAMPLE__ = False
+
+
+    def Run(self):
+        """
+        Runs the project
+        """
+
+        if self.__MODE_RANDOMSAMPLE__:
+            self.run_multiagent_project_with_rand_test(self.env, 5)
+            
+        model = self.create_model(self.env, self.log_path)
+
+        if self.__MODE_LOADMODEL__:
+            model = self.load_model(self.env, model, self.save_path)
+
+        if self.__MODE_TRAINMODEL__: 
+            model = self.train_model(model,
+                                     self.numTrainingIterations, self.save_path_intermittent)
+            self.save_model(model, self.save_path)
+
+        
+        else:
+            self.run_project(self.env, self.numEpisodes, model)
+            self.policy_evaluation(model, self.env, self.numEpisodes)
+
+
+    def run_multiagent_project_with_rand_test(self, env:MdMultiAgentMacodiacEnvironment, numEpisodes: int):
+        """
+        Runs the project with random sampling, using the multiagent env
+        """
+
+        for episode in range(numEpisodes):
+            obs = env.reset()
+            done = False
+            score = 0
+            agent_scores = []
+            iterator = 0
+            while not done:
+                #env.render()
+                iterator+=1
+                print(f'iterator:{iterator}')
+                action_arr = []
+                for i in range(len(env.policy_agents)):
+                    action_arr.append(random.randint(0,2))
+                    # agentActionSpace =  env.action_space[i]
+                    # actionForAgent = agentActionSpace.sample()
+                    # action_arr.append(actionForAgent)
+                
+                print(f'action for agents:\t{action_arr}')
+                
+                obs_arr, reward_arr, done_arr, isTerminal, info_arr = env.step(action_arr)
+                
+                # for i, reward in enumerate(reward_arr):
+                #     print(f'reward is {reward}')
+
+                agent_scores.append(sum(reward_arr))
+
+                print(f'rewards for agents:\t{reward_arr}')
+
+                if any(done_arr):
+                    isTerminal = True
+
+                done = isTerminal
+            print(f'Episode:{episode} | Aggregate agent scores:(Sum:{sum(agent_scores)})')
+        env.close()
+
+
+
+    def run_project(self, env:MdMultiAgentMacodiacEnvironment, numEpisodes: int, model):
+        """
+        Runs the project with an actual model, instead of random sampling
+        of a model
+
+        @param env: The environment to run this project with
+        @param numEpisodes: the count of episodes to run the environment for
+        """
+        scores = []
+        for episode in range(numEpisodes):
+            obs = env.reset()
+            done = False
+            score = 0
+            while not done:
+                #env.render()
+                action, _discard = model.predict(obs)
+                obs, reward, isTerminal, info = env.step(action)
+                score += reward
+                done = isTerminal
+            scores.append(score)
+
+            runningAvg = np.mean(scores)
+
+            print(f'Episode:{episode} \t| Score:{score} \t\t| RunningAvg: {round(runningAvg, 2)}')
+        env.close()
+
+
+    def create_model(self, env: MdMultiAgentMacodiacEnvironment, log_path: str):
+        env.reset()
+        model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)
+        return model
+
+
+    def train_model(self, model, numTimesteps: int, savePath:str):
+        """
+        Trains a model with the number of iterations in 
+        numtimesteps. Creates a n intermediate save every 1m iterations
+
+        @param model: The model to train. The model must have been instantiated
+        @param numTimesteps: the number of training iterations
+        """
+        
+        saveEveryNSteps = 1_000_000
+        
+        if numTimesteps < saveEveryNSteps:
+            model.learn(total_timesteps=numTimesteps)
+
+        else:
+            rangeUpper = int(numTimesteps / saveEveryNSteps)
+            for i in range(1,rangeUpper+1):
+                model.learn(total_timesteps=saveEveryNSteps)
+                model.save(os.path.join(savePath, f'interim-{i}'))
+
+        return model
+
+    def policy_evaluation(self, model, env: MdMultiAgentMacodiacEnvironment, numEpisodes:int=50):
+        """
+        Prints a policy evaluation, including the mean episode reward
+        and the standard deviation
+
+        @param model:       The model to be evaluated
+        @param env:         The environment to evaluate the model against
+        @param numEpisodes: The count of episodes to evaluate against        
+        """
+        print('\nevalResult:(mean episode reward, standard deviation)')
+        print(f'evalResult:{evaluate_policy(model, env, n_eval_episodes=numEpisodes)}\n')
+
+
+
+    def save_model(self, model, modelPath):
+        """
+        Saves a model to a given path
+
+        @param model:       The model to save
+        @param modelPath:   The path to save to
+        """
+        model.save(modelPath)    
+    
+    
+    def load_model(self, env: MdMultiAgentMacodiacEnvironment, model, modelPath: str):
+        """
+        Saves a model to a given path
+
+        @param model:       The model to save
+        @param modelPath:   The modelPath to save to
+        """
+        model = PPO.load(modelPath, env=env)
+        return model
+
+
+main = MultiagentMain()
+main.Run()
\ No newline at end of file
diff --git a/Macodiac.ML/md_multiagentenvironment.py b/Macodiac.ML/md_multiagentenvironment.py
new file mode 100644
index 0000000..6c23e8a
--- /dev/null
+++ b/Macodiac.ML/md_multiagentenvironment.py
@@ -0,0 +1,166 @@
+import gymnasium as gym
+from gym import Env
+# use `gym.spaces` here, even though we're using `gymnasium`
+# https://stackoverflow.com/questions/75108957/assertionerror-the-algorithm-only-supports-class-gym-spaces-box-box-as-acti
+from gym import spaces
+import numpy as np
+import random
+
+class AgentObject:
+    def __init__(self):
+        self.state = []
+
+class MdMultiAgentMacodiacEnvironment(Env):
+    """
+    Builds a profit maximising agent environment, supporting
+    up to n_agent agents
+    """
+    state = 0
+    environment_timesteps = 15
+
+    def __init__(self, envTimesteps:int, numAgents: int):
+        """
+        Initialises the class
+        """
+        self.environment_timesteps = envTimesteps
+        self.policy_agents = []
+        for i in range(numAgents):
+            self.policy_agents.append(AgentObject())
+
+        #self_action_space = []
+        #self.observation_space = []
+        self.agents = [numAgents]
+        mdActionSpace_arr = []
+        mdObservationSpace_arr = []
+
+        for agent in self.policy_agents:
+            mdActionSpace_arr.append(3)
+            mdObservationSpace_arr.append(10)
+            #md_action_space = spaces.MultiDiscrete(np.array([3,3]), seed=42)
+            #self_action_space.append(spaces.Discrete(3))
+            #self_observation_space.append(spaces.Box(low=np.array([0]), high=np.array([100])))
+            
+            #self.observation_space.append(spaces.Box(low=np.array([0]), high=np.array([100])))
+        
+        md_action_space = spaces.MultiDiscrete(np.array(mdActionSpace_arr))#, seed=42)
+        md_observation_space = spaces.MultiDiscrete(np.array(mdObservationSpace_arr))#, seed=42)
+        self.action_space = md_action_space
+        self.observation_space = md_observation_space
+        # self.action_space = np.array(self_action_space)
+        # self.observation_space = self_observation_space
+        self.reset()
+
+
+        
+        print('-- ENV SETTINGS --')
+        print(self.observation_space)
+        #print(self.observation_space.sample())
+        print(self.action_space)
+        #print(self.action_space.sample())
+        print(self.environment_timesteps)
+        print('-- ENV SETTINGS --')
+
+
+    def set_agent_action(self, action, agent, actionSpace):
+        agent.state = action
+
+    def step_agent(self, agent):
+        myState = agent.state - 1
+        if myState > 0:
+            reward = 1
+        elif myState == 0:
+            reward = 0
+        else:
+            reward = -1
+
+        info = {}
+        return agent.state, reward, False, info
+
+
+    def step(self, action_arr):
+        """
+        Processes an action for an agent.
+
+        Loops through each agent and sets its action. 
+        Then calls world.step to progress the entire world's actions
+
+        Builds up arrays of results, and returns them in a tuple of arrays
+
+        """
+        self.environment_timesteps -=1
+
+        obs_arr = []
+        reward_arr = []
+        done_arr = []
+        info_arr = [{'n': []}]
+        
+        agent_arr = self.policy_agents
+
+        for i, agent in enumerate(agent_arr):
+            self.set_agent_action(action_arr[i], agent, self.action_space[i])
+
+        for i, agent in enumerate(agent_arr):
+            agent.state, agent.reward, agent.done, agent.info = self.step_agent(agent)
+
+        for agent in self.policy_agents:
+            obs_arr.append(self._get_obs(agent))
+            reward_arr.append(self._get_reward(agent))
+            done_arr.append(self._get_done(agent))
+            info_arr.append(self._get_info(agent))
+
+        if self.environment_timesteps <= 0:
+            isTerminal = True
+        elif any(done_arr):
+             isTerminal = True
+        else:
+            isTerminal = False
+
+        return obs_arr, reward_arr, done_arr, isTerminal, info_arr
+
+
+    def _get_obs(self, agent):
+        """
+            accepts an Agent, and returns its observation/state
+        """
+        return agent.state
+
+    def _get_reward(self, agent):
+        """
+            accepts an Agent, and returns its reward
+        """
+        return agent.reward
+
+    def _get_done(self, agent):
+        """
+            accepts an Agent, and returns its done/terminal property
+        """
+        return agent.done
+
+    def _get_info(self, agent):
+        """
+            accepts an Agent, and returns its info object
+        """
+        return agent.info
+
+
+    def render(self) -> None:
+        """
+        Does nothing, the environment is fully headless
+        """
+        pass
+
+    def reset(self) -> float:
+        """
+        Sets the application to its initial conditions
+
+        Sets state to a random float between negative 100 to  positive 100
+        """
+        for i in range(len(self.policy_agents)):
+            self.policy_agents[i].state = np.array([0 + random.randint(0,10)]).astype(float)
+            self.policy_agents[i].reward = 0
+            self.policy_agents[i].info = {}
+            self.policy_agents[i].done = False
+
+        self.environment_timesteps = 10
+        return self.environment_timesteps
+        
\ No newline at end of file
diff --git a/Macodiac.ML/multiagent_main.py b/Macodiac.ML/multiagent_main.py
new file mode 100644
index 0000000..cfcd4d0
--- /dev/null
+++ b/Macodiac.ML/multiagent_main.py
@@ -0,0 +1,267 @@
+import os
+import gymnasium as gym
+from gymnasium import Env
+from multiagentenvironment import TensorboardPriceCallback
+from multiagentenvironment import MultiAgentMacodiacEnvironment
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.env_checker import check_env
+import numpy as np
+from stable_baselines3 import PPO
+
+
+class MultiagentMain():
+    isRunning = False
+
+    def __init__(self, mode):
+        """
+        init the class
+        """
+
+        # set to true if you want to load an existing model
+        # model loading happens first, then training
+        # NOTES: 
+        #   if loadmodel is set to false, and trainmodel is set to true, 
+        #   the currently saved model is overwritten
+        self.__MODE_LOADMODEL__ = False
+
+        # set to true if you want to train and then save the model
+        self.__MODE_TRAINMODEL__ = True
+
+        # set to true to use the randomsample mode for testing, 
+        # rather than the model version
+        self.__MODE_RANDOMSAMPLE__ = False
+
+        self.mode = mode
+
+
+        filePath = os.path.join('Macodiac.ML', 'training_multiagent','results')
+        self.log_path =  os.path.join(filePath,'Logs')
+        self.save_path =  os.path.join(filePath,'saved_models', self.mode)
+        self.save_path_intermittent =  os.path.join(filePath,'saved_models', 'intermittent_saved_models')
+
+        self.numEpisodes = 20
+        self.envTimesteps = 25
+
+        if self.mode == 'MONOPOLY':
+            self.numAgents = 1
+            self.numTrainingIterations = 2_000_000
+        elif self.mode == 'DUOPOLY':
+            self.numAgents = 2
+            self.numTrainingIterations = 3_000_000
+        elif self.mode == 'OLIGOPOLY':
+            self.numAgents = 5
+            self.numTrainingIterations = 5_000_000  
+        elif self.mode == 'PERFECT_COMP':
+            self.numAgents = 10
+            self.numTrainingIterations = 15_000_000
+        else:
+            raise ValueError(f'self.mode [{self.mode}] was not in mode options list [{self.__MODE_OPTIONS__}]')
+        
+        if self.numAgents == 0 or self.numTrainingIterations == 0:
+            raise ValueError('both numAgents and numTrainingItterations must be above 0')
+
+        self.env = MultiAgentMacodiacEnvironment(envTimesteps=self.envTimesteps, numAgents=self.numAgents)
+        check_env(self.env)
+        
+
+    def Run(self):
+        """
+        Runs the project
+        """
+        if self.__MODE_RANDOMSAMPLE__:
+            self.run_multiagent_project_with_rand_test(self.env, self.numEpisodes)
+
+        model = self.create_model(self.env, self.log_path)
+
+        if self.__MODE_LOADMODEL__:
+            model = self.load_model(self.env, model, self.save_path)
+
+        if self.__MODE_TRAINMODEL__: 
+
+            model = self.train_model(model,
+                                     self.numTrainingIterations, 
+                                     self.save_path_intermittent,
+                                     self.mode)
+            self.save_model(model, self.save_path)
+
+        if not self.__MODE_RANDOMSAMPLE__:
+            self.run_project(self.env, self.numEpisodes, model)
+            self.policy_evaluation(model, self.env, self.numEpisodes)
+
+
+    def run_multiagent_project_with_rand_test(self, env:MultiAgentMacodiacEnvironment, numEpisodes: int):
+        """
+        Runs the project with random sampling, using the multiagent env
+        """
+
+        for episode in range(numEpisodes):
+            obs = env.reset()
+            done = False
+            score = 0
+            agent_scores = []
+            iterator = 0
+            while not done:
+                #env.render()
+                iterator+=1
+                #print(f'iterator:{iterator}')
+                # action_arr = env.action_space.sample()
+
+                action_arr = []
+                for i in range(self.numAgents):
+                    action_arr.append(11)
+
+                print(f'action for agents:\t{action_arr}')
+                
+                obs_arr, reward, isDone, info_arr = env.step(action_arr)
+                
+                agent_scores.append(reward)
+
+                # print(f'rewards for agents:\t{reward}')
+                # print(f'obs for agents:\t{obs_arr}')
+
+                info_arr = info_arr['n']
+                print(f'px for agents:\t{info_arr}')
+                quantitySold = 0
+                moneySales = 0
+                for i, agentInfo in enumerate(info_arr):
+                    agent_sales = info_arr[i]['sold']
+                    agent_price = info_arr[i]['price']
+                    agent_sales_in_money = agent_sales * agent_price
+                    moneySales += agent_sales_in_money
+                    quantitySold += agent_sales
+                    
+                print(f'a_vending/quantity_sold_count: {quantitySold} at cost [{moneySales}]/[{env.peek_env_consumer_money()}. Consumer money per turn:{env.peek_env_consumer_money_each()}]')
+                if moneySales > env.peek_env_consumer_money():
+                    print(f'Money sales of [{moneySales}]/[{env.peek_env_consumer_money()}] were too high. Consumer money per turn:{env.peek_env_consumer_money_each()}')
+                    return # raise Exception(f'Money sales of [{moneySales}]/[{env.peek_env_consumer_money()}] were too high. Consumer money per turn:{env.peek_env_consumer_money_each()}')
+
+
+                if done:
+                    print(f'is done')
+                done = isDone
+            print(f'Episode:{episode} | \nAggregate agent scores:(Sum:{sum(agent_scores)})\n MeanAvg agent scores:({np.mean(agent_scores)})')
+        env.close()
+
+
+    def run_project_with_rand_test(self, env:MultiAgentMacodiacEnvironment, numEpisodes:int):
+        """
+        Runs the project with random sampling, instead
+        of a model
+
+        @param env: The environment to run this project with
+        @param numEpisodes: the count of episodes to run the environment for
+        """
+        for episode in range(numEpisodes):
+            obs = env.reset()
+            done = False
+            score = 0
+            while not done:
+                #env.render()
+                action = env.action_space.sample()
+                obs, reward, isTerminal, info = env.step(action)
+                score += reward
+                done = isTerminal
+            print(f'Episode:{episode}  | Score:{score}')
+        env.close()
+
+
+    def run_project(self, env:MultiAgentMacodiacEnvironment, numEpisodes: int, model):
+        """
+        Runs the project with an actual model, instead of random sampling
+        of a model
+
+        @param env: The environment to run this project with
+        @param numEpisodes: the count of episodes to run the environment for
+        """
+        scores = []
+        for episode in range(numEpisodes):
+            obs = env.reset()
+            done = False
+            score = 0
+            while not done:
+                #env.render()
+                action, _discard = model.predict(obs)
+                obs, reward, isTerminal, info = env.step(action)
+                score += reward
+                done = isTerminal
+            scores.append(score)
+
+            runningAvg = np.mean(scores)
+
+            print(f'Episode:\t{episode} \t| Score:\t{score} \t\t| RunningAvg: {round(runningAvg, 2)}')
+        env.close()
+
+
+    def create_model(self, env: MultiAgentMacodiacEnvironment, log_path: str):
+        model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path, device="cpu")
+        return model
+
+
+    def train_model(self, model, numTimesteps: int, savePath:str, saveName: str):
+        """
+        Trains a model with the number of iterations in 
+        numtimesteps. Creates a n intermediate save every 1m iterations
+
+        @param model: The model to train. The model must have been instantiated
+        @param numTimesteps: the number of training iterations
+        """
+        
+        saveEveryNSteps = 1_000_000
+        
+        if numTimesteps < saveEveryNSteps:
+            model.learn(total_timesteps=numTimesteps,
+                        callback=TensorboardPriceCallback(), 
+                        tb_log_name=saveName)
+
+        else:
+            rangeUpper = int(numTimesteps / saveEveryNSteps)
+            for i in range(1,rangeUpper+1):
+                model.learn(total_timesteps=saveEveryNSteps,
+                            callback=TensorboardPriceCallback(),
+                            tb_log_name=saveName)
+                model.save(os.path.join(savePath, f'interim-{i}'))
+
+        return model
+
+    def policy_evaluation(self, model, env: MultiAgentMacodiacEnvironment, numEpisodes:int=50):
+        """
+        Prints a policy evaluation, including the mean episode reward
+        and the standard deviation
+
+        @param model:       The model to be evaluated
+        @param env:         The environment to evaluate the model against
+        @param numEpisodes: The count of episodes to evaluate against        
+        """
+        print('\nevalResult:(mean episode reward, standard deviation)')
+        print(f'evalResult:{evaluate_policy(model, env, n_eval_episodes=numEpisodes)}\n')
+
+
+
+    def save_model(self, model, modelPath):
+        """
+        Saves a model to a given path
+
+        @param model:       The model to save
+        @param modelPath:   The path to save to
+        """
+        model.save(modelPath)    
+    
+    
+    def load_model(self, env: MultiAgentMacodiacEnvironment, model, modelPath: str):
+        """
+        Saves a model to a given path
+
+        @param model:       The model to save
+        @param modelPath:   The modelPath to save to
+        """
+        model = PPO.load(modelPath, env=env)
+        return model
+
+
+
+
+
+__MODE_OPTIONS__ = ['MONOPOLY', 'DUOPOLY', 'OLIGOPOLY', 'PERFECT_COMP']
+for mode in __MODE_OPTIONS__:
+    main = MultiagentMain(mode)
+    main.Run()
\ No newline at end of file
diff --git a/Macodiac.ML/multiagentenvironment.py b/Macodiac.ML/multiagentenvironment.py
new file mode 100644
index 0000000..ce0742d
--- /dev/null
+++ b/Macodiac.ML/multiagentenvironment.py
@@ -0,0 +1,444 @@
+import gymnasium as gym
+from gym import Env
+# use `gym.spaces` here, even though we're using `gymnasium`
+# https://stackoverflow.com/questions/75108957/assertionerror-the-algorithm-only-supports-class-gym-spaces-box-box-as-acti
+from gym import spaces
+import numpy as np
+import random
+from random import shuffle
+from stable_baselines3.common.callbacks import BaseCallback
+
+class AgentObject:
+    def __init__(self):
+        self.state = []
+        self.reset_values()
+    
+    def reset_values(self):
+        self.vendingPrice = 0
+        self.reward = 0
+        self.quantitySold = 0
+        self.vendCost = 5
+        self.totalVendingCost = 0
+        self.vendCostTrend = 'down'
+
+class ConsumerObject:
+    def __init__(self):
+        self.demand = 1
+        self.utility = 0
+        self.money = 0
+        self.total_consumed = 0
+
+
+class TensorboardPriceCallback(BaseCallback):
+    """ 
+    custom logger to record the price charged by agents
+    """
+    runningAvgMeanPxOffered = 0
+    runningAvgAcceptedVendedPx = 0
+
+    def __init__(self, verbose=0):
+        self.reset()
+        super().__init__(verbose)
+
+    def reset(self):
+        self.iterator = 0
+
+    def _on_rollout_end(self) -> None:
+        self.reset()
+        return super()._on_rollout_end()
+    
+    def _on_step(self) -> bool:
+        self.reset()
+        # self.iterator +=1
+        # agent_arr = self.training_env.get_attr('policy_agents')[0]
+        
+        ### generate a dict like:
+        # [{'agent_num': 0, 'price': 10, 'sold': 0.0, 'reward': 0.0}, 
+        # {'agent_num': 1, 'price': 10, 'sold': 50.0, 'reward': 0.0}]
+        info_arr = self.locals['infos'][0]['n']
+
+
+        pxList = []
+        acceptedPxList = []
+        vendCostList = []
+        vendorsMadeSale = 0
+        quantitySold = 0
+        countNoSale = 0
+        countWiSale = 0
+        meanPxOffered = 0
+        agent_sales = 0
+        agent_vend_px = 0
+        agent_reward = 0
+        money_sales = 0
+        agent_sales_in_money = 0
+        agent_total_vend_cost = 0
+        agent_final_vend_cost = 0
+        meanPxAccepted = 0
+        meanVendCost = 0
+         
+        for i, agentInfo in enumerate(info_arr):
+            agent_sales = info_arr[i]['sold']
+            agent_vend_px = info_arr[i]['price']
+            agent_reward = info_arr[i]['reward']
+            agent_final_vend_cost = info_arr[i]['vendCost']
+            agent_total_vend_cost = info_arr[i]['totalVendingCost']
+            agent_sales_in_money = agent_sales * agent_vend_px
+            money_sales += agent_sales_in_money
+
+            pxList.append(agent_vend_px)
+            
+            if agent_sales > 0:
+                vendorsMadeSale += 1
+                quantitySold += agent_sales
+                countWiSale += 1
+                acceptedPxList.append(agent_vend_px)
+                vendCostList.append(agent_final_vend_cost)
+            else:
+                countNoSale += 1
+            
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/offered_px',   agent_vend_px)
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/sales_complete',  agent_sales)
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/sales_value',  agent_sales_in_money)
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/final_vend_cost',  agent_final_vend_cost)
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/total_vend_cost',  agent_total_vend_cost)
+            self.logger.record(f'a_vending_agent_{agentInfo["agent_num"]}/individual_reward',  agent_reward)
+            
+        if len(pxList) > 0:
+            meanPxOffered = np.mean(pxList)
+        if len(acceptedPxList) > 0:
+            meanPxAccepted = np.mean(acceptedPxList)
+        if len(vendCostList) > 0:
+            meanVendCost = np.mean(vendCostList)
+
+
+        self.logger.record('a_vending/avgerage_offered_px_value', meanPxOffered)
+        self.logger.record('a_vending/average_accepted_px_value', meanPxAccepted)
+        self.logger.record('a_vending/average_final_vend_cost', meanVendCost)
+        self.logger.record('a_vending/quantity_sold_count', quantitySold)
+        self.logger.record('a_vending/total_value_sold', money_sales)
+        self.logger.record('a_vending/vendors_made_sale_count', vendorsMadeSale)
+        self.logger.record('a_vending/count_no_sale', countNoSale)
+        self.logger.record('a_vending/count_wi_sale', countWiSale)
+
+        return True
+
+class MultiAgentMacodiacEnvironment(Env):
+    """
+    Builds a profit maximising agent environment, supporting
+    up to n_agent agents
+    """
+    state = 0
+    environment_timesteps = 0
+    environment_starter_timesteps = 150
+    env_wholesale_price = 8        # the price agents pay to purchase goods
+    env_agent_marginal_cost = 0     # the marginal cost of vending
+    num_consumers = 25
+    consumer_total_money_per_turn = 475
+    consumers_arr = []
+
+
+    def __init__(self, envTimesteps:int, numAgents: int):
+        """
+        Initialises the class
+        """
+        self.environment_starter_timesteps = envTimesteps
+        self.policy_agents = []
+        self.consumers_arr = []
+        self.observation_space = []
+
+        for i in range(numAgents):
+            self.policy_agents.append(AgentObject())
+        
+        for i in range(self.num_consumers):
+            self.consumers_arr.append(ConsumerObject())
+
+        
+        # creates an array full of 10's shaped [20,20,20], of length numAgents
+        self.action_space = spaces.MultiDiscrete(np.full(numAgents, 15) )
+
+
+        # the observation space is a nAgents by nActions array of float32 numbers between -99-99
+        # also contains the wholesale price
+        # Observations space:
+        # 0: agent's state, after the action has been applied
+        # 1: agent's vending price in this round
+        # 2: agent's count of sold items
+        # 3: the wholesale price in this round
+        self.observation_space = spaces.Box(low=0,high=200, shape=(numAgents, 3), dtype=np.int32)
+
+        print(f'obs_space.sample: {self.observation_space.sample()}')
+
+        self.reset()
+
+
+        
+        print('-- ENV SETTINGS --')
+        print(f'obs:{self.observation_space}')
+        print(f'sample:{self.observation_space.sample()}')
+        print(self.action_space)
+        print(self.action_space.sample())
+        print(self.environment_timesteps)
+        print('-- ENV SETTINGS --')
+
+    def peek_env_consumer_money(self):
+        return self.consumer_total_money_per_turn
+    def peek_env_consumer_money_each(self):
+        return self.consumerMoneyEach
+
+    def clear_consumer_stats(self, consumer):
+        consumer.money = self.consumerMoneyEach
+
+    def clear_agent_stats(self, agent):
+        agent.reset_values()
+        
+
+
+    def set_agent_action(self, action, agent):
+        # agent.state is the percentage price diff from the wholesale price
+        agent.state = action
+        agent.vendingPrice = self.env_wholesale_price + agent.state
+
+        if agent.vendingPrice == 0:
+            print(f'error')
+            agent.vendingPrice = max(1, agent.vendingPrice)
+
+
+        # agentBaseVendingPriceAdjust = self.env_wholesale_price * (agent.state / 100)
+        # baseAgentVendingPrice = self.env_wholesale_price + agentBaseVendingPriceAdjust
+        # #agentMarginalCostAddedVendingPrice = agentBaseVendingPriceAdjust + self.env_agent_marginal_cost
+        # agent.vendingPrice = max(baseAgentVendingPrice, 1)
+        # print(f'agent vending price was {agent.vendingPrice}')
+
+    def step_agent(self, agent):
+        reward = agent.reward
+        info = {}
+        return agent.state, reward, False, info
+
+
+    def step(self, action_arr):
+        """
+        Processes an action for an agent.
+
+        Loops through each agent and sets its action. 
+        Then calls world.step to progress the entire world's actions
+
+        Builds up arrays of results, and returns them in a tuple of arrays
+
+        """
+        self.environment_timesteps -=1
+
+        obs_arr = []
+        reward_arr = []
+        done_arr = []
+        info_arr = {'n': []}
+        
+        for i, agent in enumerate(self.policy_agents):
+            self.clear_agent_stats(agent)
+            self.set_agent_action(action_arr[i], agent)
+
+        for i, consumer in enumerate(self.consumers_arr):
+            self.clear_consumer_stats(consumer)
+            self.alt_set_consumer_purchases(self.policy_agents, consumer)
+
+        for i, agent in enumerate(self.policy_agents):
+            agent.state, agent.reward, agent.done, agent.info = self.step_agent(agent)
+            obs_arr.append(self._get_obs(agent))
+            reward_arr.append(self._get_reward(agent))
+            done_arr.append(self._get_done(agent))
+            info_arr['n'].append(self._get_info(agent, i))        
+
+        if self.environment_timesteps <= 0:
+            isTerminal = True
+        elif any(done_arr):
+             isTerminal = True
+        else:
+            isTerminal = False
+
+        tmpObsArray = []
+        for i, agent in enumerate(self.policy_agents):
+            partialObservationResult = self.get_agent_default_observation_array()
+            partialObservationResult[0] = self._get_obs(agent) #The agent's result is present in the 0th element of its result
+            partialObservationResult[1] = self._get_final_vend_price(agent) #The agent's result is present in the 0th element of its result
+            partialObservationResult[2] = self._get_quantity_sold(agent) #The agent's result is present in the 0th element of its result
+            tmpObsArray.append(partialObservationResult)
+
+        concatObsArray = np.array(tmpObsArray).astype(np.int32) 
+        return concatObsArray, float(sum(reward_arr)), isTerminal,  info_arr
+
+    def alt_set_consumer_purchases(self, agents_arr, consumer):
+        """
+        So long as the consumer has money, loops through the agents, and selects the lowest
+        priced agent. 
+
+        if multiple agents share the same price points, distributes the sales across them all
+        """
+        lowestAbsolutePrice = 0
+        lowestPriceAgentIndexList = []
+        vendingPrices = []
+
+        for i, agent in enumerate(agents_arr):
+            vendingPrices.append(agent.vendingPrice)
+
+      
+        # print(f'prices are: {vendingPrices}')
+        lowestAbsolutePrice = min(vendingPrices)
+       
+        # gather all of the lowest price agents
+        for i, agent in enumerate(agents_arr):
+            if agent.vendingPrice == lowestAbsolutePrice:
+                lowestPriceAgentIndexList.append(i)
+
+        shuffle(lowestPriceAgentIndexList)
+
+        # while the consumer still has money, purchase
+        # items from the vendors
+        while consumer.money > 0:
+            # loop through each vendor, purchase one item from them
+            for agentIndex in lowestPriceAgentIndexList:
+                if consumer.money > 0:
+                    agentToPurchaseFrom = agents_arr[agentIndex]
+                    
+                    if agentToPurchaseFrom.vendingPrice != lowestAbsolutePrice:
+                        raise ValueError(f'agent vending price [{agentToPurchaseFrom.vendingPrice}] is not the same as lowestAbsPrice:[{lowestAbsolutePrice}]')
+
+                    if consumer.money >= agentToPurchaseFrom.vendingPrice:
+                        if consumer.money < agentToPurchaseFrom.vendingPrice:
+                            raise ValueError(f'consumer with: [{consumer.money}] money attempted to purchase from agent charging: [{agentToPurchaseFrom.vendingPrice}]')
+                        consumer.money -= agentToPurchaseFrom.vendingPrice
+                        # print(f'consumer money: {consumer.money}')
+                        consumer.total_consumed += 1
+                        agentToPurchaseFrom.quantitySold += 1
+
+                        # Marginal cost trends down towards 1, then increases upwards
+                        if agentToPurchaseFrom.vendCostTrend == 'up':
+                            agentToPurchaseFrom.vendCost += 0.66
+                        elif agentToPurchaseFrom.vendCostTrend == 'down':
+                            agentToPurchaseFrom.vendCost -= 0.66
+                            if agentToPurchaseFrom.vendCost < 1:
+                                agentToPurchaseFrom.vendCostTrend = 'up'
+                        
+                        agentToPurchaseFrom.totalVendingCost += agentToPurchaseFrom.vendCost 
+                        agentToPurchaseFrom.reward += (agentToPurchaseFrom.vendingPrice - self.env_wholesale_price - agentToPurchaseFrom.vendCost)
+                    else:
+                        # print(f'consumer money was {consumer.money}, setting to 0')
+                        consumer.money = 0
+                        break
+
+
+    def set_consumer_purchases(self, agents_arr, consumer):
+        """
+        So long as the consumer has money, loops through the agents, and selects the lowest
+        price agent. 
+
+        Purchases as many items from the agent as possible
+        """
+
+        lowestPriceAgnetIndex = 0
+        vendingPrices = []
+            
+        for i, agent in enumerate(agents_arr):
+            vendingPrices.append(agent.vendingPrice)
+            if agent.vendingPrice < agents_arr[lowestPriceAgnetIndex].vendingPrice:
+                lowestPriceAgnetIndex = i
+
+        lowestAgentVendPrice = agents_arr[lowestPriceAgnetIndex].vendingPrice
+
+        # instead of this while loop, just return the 
+        quantityPurchasable = np.floor(consumer.money / lowestAgentVendPrice)
+        consumerConsumed = quantityPurchasable
+        tmpAgentRewardPerUnitSold = (lowestAgentVendPrice - self.env_wholesale_price)
+        agentReward = tmpAgentRewardPerUnitSold * consumerConsumed
+
+        # consumer.money = 0
+        consumer.total_consumed += consumerConsumed
+        agents_arr[lowestPriceAgnetIndex].reward += agentReward
+        agents_arr[lowestPriceAgnetIndex].quantitySold += consumerConsumed
+        return lowestPriceAgnetIndex, lowestAgentVendPrice, vendingPrices
+
+
+    def _get_quantity_sold(self, agent):
+        """
+            accepts an agent, and returns the number of items it sold
+        """
+        return agent.quantitySold
+
+    def _get_final_vend_price(self, agent):
+        """
+            accepts an agent and returns its final vending
+        """
+        return agent.vendingPrice
+    
+    def _get_obs(self, agent):
+        """
+            accepts an Agent, and returns its observation/state
+        """
+        return agent.state
+
+    def _get_reward(self, agent):
+        """
+            accepts an Agent, and returns its reward
+        """
+        return agent.reward
+
+    def _get_done(self, agent):
+        """
+            accepts an Agent, and returns its done/terminal property
+        """
+        return agent.done
+
+    def _get_info(self, agent , i):
+        """
+            accepts an Agent, and returns its info object
+        """
+        return {
+            "agent_num": i,
+            "price": agent.vendingPrice,
+            "sold": agent.quantitySold,
+            "reward": agent.reward,
+            "vendCost": agent.vendCost,
+            "totalVendingCost": agent.totalVendingCost
+            
+        }
+
+
+    def render(self) -> None:
+        """
+        Does nothing, the environment is fully headless
+        """
+        pass
+
+    def reset(self): #-> float:
+        """
+        Sets the application to its initial conditions
+
+        Sets state to a random float between negative 100 to  positive 100
+        """
+        obs_arr =[]
+        for i in range(len(self.policy_agents)):
+            self.policy_agents[i].state = np.array(
+                                    self.get_agent_default_observation_array(), 
+                                     dtype=np.int32)
+            obs_arr.append(self.policy_agents[i].state)
+            self.policy_agents[i].reward = 0
+            self.policy_agents[i].info = {}
+            self.policy_agents[i].done = False
+            self.policy_agents[i].vendingPrice = 0
+            self.policy_agents[i].quantitySold = 0
+            self.policy_agents[i].vendCost = 1
+
+        
+        self.consumerMoneyEach = self.consumer_total_money_per_turn / self.num_consumers
+        # for i in range(len(self.consumers_arr)):
+            # self.clear_consumer_stats(self.consumers_arr[i])
+            # self.consumers_arr[i].money = consumerMoneyEach
+
+        self.environment_timesteps = self.environment_starter_timesteps
+        
+        return np.array(obs_arr).astype(np.int32)
+
+
+    def get_agent_default_observation_array(self):
+        """
+        Gets a default observation for this space
+        """
+        return [0.0, 0.0, 0]# , self.env_wholesale_price]
\ No newline at end of file