Merge pull request #45 from copa-uniandes/fixes

juanfperez · web-flow · commit f35697db0ebb · 2025-04-07T07:19:08.000-05:00
fixes on dtmdp test
diff --git a/tests/tests_dtmdp.py b/tests/tests_dtmdp.py
@@ -14,7 +14,7 @@ def test_V_value_iteration(self):
         # states:
         states = np.array([i for i in range(0,N)])
         # actions
-        actions = np.array([a for a in range(0,N)]) 
+        actions = np.array([str(a) for a in range(0,N)]) 
         # immediate returns:
         immediate_returns = np.array([[3, 1], [2, 3]])
         # discount factor:
@@ -33,7 +33,7 @@ def test_policy_value_iteration(self):
         # states:
         states = np.array([i for i in range(0,N)])
         # actions
-        actions = np.array([a for a in range(0,N)]) 
+        actions = np.array([str(a) for a in range(0,N)]) 
         # immediate returns:
         immediate_returns = np.array([[3, 1], [2, 3]])
         # discount factor:
@@ -45,14 +45,14 @@ def test_policy_value_iteration(self):
 
         mdp = dtmdp(states, actions, transition_matrices, immediate_returns, discount_factor)
         result = mdp.solve(0, minimize = True)[1]
-        self.assertEqual(result, {0: 1, 1: 0})
+        self.assertEqual(result, {0: '1', 1: '0'})
     def test_V_policy_iteration(self):
         # number of states:
         N = 2
         # states:
         states = np.array([i for i in range(0,N)])
         # actions
-        actions = np.array([a for a in range(0,N)]) 
+        actions = np.array([str(a) for a in range(0,N)]) 
         # immediate returns:
         immediate_returns = np.array([[3, 1], [2, 3]])
         # discount factor:
@@ -71,7 +71,7 @@ def test_policy_policy_iteration(self):
         # states:
         states = np.array([i for i in range(0,N)])
         # actions
-        actions = np.array([a for a in range(0,N)]) 
+        actions = np.array([str(a) for a in range(0,N)]) 
         # immediate returns:
         immediate_returns = np.array([[3, 1], [2, 3]])
         # discount factor:
@@ -83,7 +83,7 @@ def test_policy_policy_iteration(self):
 
         mdp = dtmdp(states, actions, transition_matrices, immediate_returns, discount_factor)
         result = mdp.solve(0, minimize = True)[1]
-        self.assertEqual(result, {0: 1, 1: 0})
+        self.assertEqual(result, {0: '1', 1: '0'})
         
 if __name__ == '__main__':
     unittest.main()