PA2/test.py at main · cs4789-s21/PA2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pickle
import numpy as np
import utils


with open('test_info.pkl', 'rb') as f:
    tests_info = pickle.load(f)
test_cases = sorted(tests_info.keys())


""" ------------- testing action distribution computation ----------------"""
print('-'*10 + ' testing compute_action_distribution ' + '-'*10)
for i in test_cases:
    theta = tests_info[i]['theta']
    phis = tests_info[i]['phis']
    soln_action_dist = tests_info[i]['action_dst']
    action_dist = utils.compute_action_distribution(theta, phis)

    err = np.linalg.norm(soln_action_dist - action_dist)
    print('test {} for compute_action_distribution - error = {}'.format(i, err))

""" ------------- testing compute_log_softmax_grad ----------------"""
print('-' * 10 + ' testing compute_log_softmax_grad ' + '-' * 10)
for i in test_cases:
    theta = tests_info[i]['theta']
    phis = tests_info[i]['phis']
    action = tests_info[i]['action']
    soln_grad = tests_info[i]['grad']
    grad = utils.compute_log_softmax_grad(theta, phis, action)

    err = np.linalg.norm(soln_grad - grad)
    print('test {} for compute_log_softmax_grad - error = {}'.format(i, err))

""" ------------- testing compute_fisher_matrix ----------------"""
print('-' * 10 + ' testing compute_fisher_matrix ' + '-' * 10)
for i in test_cases:
    total_grads = tests_info[i]['total_grads']
    total_rewards = tests_info[i]['total_rewards']

    soln_fisher = tests_info[i]['fisher']
    fisher = utils.compute_fisher_matrix(total_grads)

    err = np.linalg.norm(soln_fisher - fisher)
    print('test {} for compute_fisher_matrix - error = {}'.format(i, err))

""" ------------- testing compute_value_gradient ----------------"""
print('-' * 10 + ' testing compute_value_gradient ' + '-' * 10)
for i in test_cases:
    total_grads = tests_info[i]['total_grads']
    total_rewards = tests_info[i]['total_rewards']

    soln_v_grad = tests_info[i]['v_grad']
    v_grad = utils.compute_value_gradient(total_grads, total_rewards)

    err = np.linalg.norm(soln_v_grad - v_grad)
    print('test {} for compute_value_gradient - error = {}'.format(i, err))

""" ------------- testing compute_value_gradient ----------------"""
print('-' * 10 + ' testing compute_value_gradient ' + '-' * 10)
for i in test_cases:

    fisher = tests_info[i]['fisher']
    delta = 1e-2
    v_grad = tests_info[i]['v_grad']
    soln_eta = tests_info[i]['eta']

    eta = utils.compute_eta(delta, fisher, v_grad)

    err = np.linalg.norm(soln_eta - eta)
    print('test {} for compute_eta - error = {}'.format(i, err))