-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest.py
More file actions
72 lines (53 loc) · 2.48 KB
/
test.py
File metadata and controls
72 lines (53 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pickle
import numpy as np
import utils
with open('test_info.pkl', 'rb') as f:
tests_info = pickle.load(f)
test_cases = sorted(tests_info.keys())
""" ------------- testing action distribution computation ----------------"""
print('-'*10 + ' testing compute_action_distribution ' + '-'*10)
for i in test_cases:
theta = tests_info[i]['theta']
phis = tests_info[i]['phis']
soln_action_dist = tests_info[i]['action_dst']
action_dist = utils.compute_action_distribution(theta, phis)
err = np.linalg.norm(soln_action_dist - action_dist)
print('test {} for compute_action_distribution - error = {}'.format(i, err))
""" ------------- testing compute_log_softmax_grad ----------------"""
print('-' * 10 + ' testing compute_log_softmax_grad ' + '-' * 10)
for i in test_cases:
theta = tests_info[i]['theta']
phis = tests_info[i]['phis']
action = tests_info[i]['action']
soln_grad = tests_info[i]['grad']
grad = utils.compute_log_softmax_grad(theta, phis, action)
err = np.linalg.norm(soln_grad - grad)
print('test {} for compute_log_softmax_grad - error = {}'.format(i, err))
""" ------------- testing compute_fisher_matrix ----------------"""
print('-' * 10 + ' testing compute_fisher_matrix ' + '-' * 10)
for i in test_cases:
total_grads = tests_info[i]['total_grads']
total_rewards = tests_info[i]['total_rewards']
soln_fisher = tests_info[i]['fisher']
fisher = utils.compute_fisher_matrix(total_grads)
err = np.linalg.norm(soln_fisher - fisher)
print('test {} for compute_fisher_matrix - error = {}'.format(i, err))
""" ------------- testing compute_value_gradient ----------------"""
print('-' * 10 + ' testing compute_value_gradient ' + '-' * 10)
for i in test_cases:
total_grads = tests_info[i]['total_grads']
total_rewards = tests_info[i]['total_rewards']
soln_v_grad = tests_info[i]['v_grad']
v_grad = utils.compute_value_gradient(total_grads, total_rewards)
err = np.linalg.norm(soln_v_grad - v_grad)
print('test {} for compute_value_gradient - error = {}'.format(i, err))
""" ------------- testing compute_value_gradient ----------------"""
print('-' * 10 + ' testing compute_value_gradient ' + '-' * 10)
for i in test_cases:
fisher = tests_info[i]['fisher']
delta = 1e-2
v_grad = tests_info[i]['v_grad']
soln_eta = tests_info[i]['eta']
eta = utils.compute_eta(delta, fisher, v_grad)
err = np.linalg.norm(soln_eta - eta)
print('test {} for compute_eta - error = {}'.format(i, err))