Room-AI/orchestrate.py at master · kxerxess/Room-AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from environment import Room
from agent_randomaf import RandomAF
from agent_analyst import Analyst
from agent_gambler import Gambler
from agent_pintu import Pintu
import numpy as np
import matplotlib.pyplot as plt
import time


def main():

    agent = Gambler()
    room = Room()

    total_reward = 0

    iterations = 10000

    graph_step = []
    graph_score = []

    #room.room[room.state[0]][room.state[1]] = 1
    #print(room.room)

    for step in range(iterations):
        old_state = room.state
        action = agent.get_next_action(old_state)
        new_state, reward = room.take_action(action)
        agent.update(old_state, action, reward, new_state)

        total_reward += reward

        if step % 1 == 0:
            # np.append(graph_step, step)
            # np.append(graph_score, total_reward)
            graph_step.append(step)
            graph_score.append(total_reward)

        #room.room[old_state[0]][old_state[1]] = 0
        #room.room[new_state[0]][new_state[1]] = 1
        #print(room.room, end='\n')
        #print('Action:', action)
        #print('Total Reward:', total_reward)
        #print('---------------------------------------')
        #print('Q-Table:', agent.t_q_table, agent.b_q_table, agent.l_q_table, agent.r_q_table, sep='\n\n')
        #print('---------------------------------------')

        #time.sleep(1)

    print('Total Reward:', total_reward)
    #('Q-Table:', agent.t_q_table, agent.b_q_table, agent.l_q_table, agent.r_q_table, sep='\n\n'
    #print('Q-Table:', [agent.t_q_table, agent.b_q_table, agent.l_q_table, agent.r_q_table])
    #print('Q-Table:', agent.q_table)

    plt.scatter(graph_step, graph_score, 2)
    plt.show()


if __name__ == "__main__":
    main()