diff --git a/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth b/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth
new file mode 100644
index 0000000..e4eeaf0
Binary files /dev/null and b/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth differ
diff --git a/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth b/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth
new file mode 100644
index 0000000..6d42660
Binary files /dev/null and b/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth differ
diff --git a/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt b/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt
new file mode 100644
index 0000000..1f08793
--- /dev/null
+++ b/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt
@@ -0,0 +1,76 @@
+Test Environment Step with No Action
+Environment has been reset
+Seed: 42
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 0.0
+alpha_x: 0.0
+alpha_y: 0.0
+
+
+Observation: 
+X: 3.4947384e+07
+Y: -2.2985344e+08
+VX: 23.377726
+VY: 3.5543969
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.881e+00  2.070e+01  2.088e+02  2.090e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.551e+00  3.554e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4164000e+04  1.2784000e+04 -1.3008118e-03  8.5599422e-03
+  0.0000000e+00]
+planet_radii: [695700.]
+a: 2.3249498e+08
+e: 9.084384e-08
+w: 170.61444
+theta: 179.25969
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt b/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt
new file mode 100644
index 0000000..1f08793
--- /dev/null
+++ b/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt
@@ -0,0 +1,76 @@
+Test Environment Step with No Action
+Environment has been reset
+Seed: 42
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 0.0
+alpha_x: 0.0
+alpha_y: 0.0
+
+
+Observation: 
+X: 3.4947384e+07
+Y: -2.2985344e+08
+VX: 23.377726
+VY: 3.5543969
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.881e+00  2.070e+01  2.088e+02  2.090e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.551e+00  3.554e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4164000e+04  1.2784000e+04 -1.3008118e-03  8.5599422e-03
+  0.0000000e+00]
+planet_radii: [695700.]
+a: 2.3249498e+08
+e: 9.084384e-08
+w: 170.61444
+theta: 179.25969
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt b/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt
new file mode 100644
index 0000000..e010724
--- /dev/null
+++ b/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt
@@ -0,0 +1,76 @@
+Test Environment Step with Thrust Action
+Environment has been reset
+Seed: 42
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 1.0
+alpha_x: 1.0
+alpha_y: 1.0
+
+
+Observation: 
+X: 3.4947384e+07
+Y: -2.2985344e+08
+VX: 23.378437
+VY: 3.555108
+m: 3365.874
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.866e+00  2.053e+01  2.071e+02  2.073e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.551e+00  3.555e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4164000e+04  1.2784000e+04 -5.8937073e-04  9.2711449e-03
+ -1.2597656e-01]
+planet_radii: [695700.]
+a: 2.3251091e+08
+e: 7.2937924e-05
+w: 28.867548
+theta: 339.77762
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt b/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt
new file mode 100644
index 0000000..e010724
--- /dev/null
+++ b/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt
@@ -0,0 +1,76 @@
+Test Environment Step with Thrust Action
+Environment has been reset
+Seed: 42
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 1.0
+alpha_x: 1.0
+alpha_y: 1.0
+
+
+Observation: 
+X: 3.4947384e+07
+Y: -2.2985344e+08
+VX: 23.378437
+VY: 3.555108
+m: 3365.874
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.866e+00  2.053e+01  2.071e+02  2.073e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.551e+00  3.555e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4164000e+04  1.2784000e+04 -5.8937073e-04  9.2711449e-03
+ -1.2597656e-01]
+planet_radii: [695700.]
+a: 2.3251091e+08
+e: 7.2937924e-05
+w: 28.867548
+theta: 339.77762
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth b/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth
new file mode 100644
index 0000000..6017479
Binary files /dev/null and b/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth differ
diff --git a/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt b/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt
new file mode 100644
index 0000000..2208fb4
--- /dev/null
+++ b/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt
@@ -0,0 +1,77 @@
+Test Environment Step with Neural Network Thrust Action
+Environment has been reset
+Seed: 42
+Neural Net loaded from: C:\Users\micha\MSI_Data\Masters_Thesis\astro_compass\data\test_data\test_env_step_with_nn_action\nn_controller_weights_smoothed_full_10e3_epochs.pth
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 0.28366512
+alpha_x: -0.22682723
+alpha_y: -0.17161115
+
+
+Observation: 
+X: 3.494738e+07
+Y: -2.2985344e+08
+VX: 23.376595
+VY: 3.553541
+m: 3365.964
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.900e+00  2.090e+01  2.109e+02  2.111e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.550e+00  3.554e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4160000e+04  1.2784000e+04 -2.4318695e-03  7.7040195e-03
+ -3.5888672e-02]
+planet_radii: [695700.]
+a: 2.3247046e+08
+e: 0.00010934778
+w: 156.20451
+theta: 164.84969
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt b/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt
new file mode 100644
index 0000000..2208fb4
--- /dev/null
+++ b/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt
@@ -0,0 +1,77 @@
+Test Environment Step with Neural Network Thrust Action
+Environment has been reset
+Seed: 42
+Neural Net loaded from: C:\Users\micha\MSI_Data\Masters_Thesis\astro_compass\data\test_data\test_env_step_with_nn_action\nn_controller_weights_smoothed_full_10e3_epochs.pth
+Observation: 
+X: 3.486322e+07
+Y: -2.2986622e+08
+VX: 23.379026
+VY: 3.545837
+m: 3366.0
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+Info: 
+Elapsed time: 0.0
+ODE Solution: None
+delta_state: None
+planet_radii: [695700.]
+a: 2.32495e+08
+e: 1.2412671e-16
+w: 26.56505
+theta: 342.0591
+max_thrust: 0.00133
+ISP: 3872.0
+
+
+Action
+u: 0.28366512
+alpha_x: -0.22682723
+alpha_y: -0.17161115
+
+
+Observation: 
+X: 3.494738e+07
+Y: -2.2985344e+08
+VX: 23.376595
+VY: 3.553541
+m: 3365.964
+mu: 1.3e+11
+sma_target: 1.4959802e+08
+
+
+reward: 0.0
+terminated: False
+truncated: False
+
+
+Info post-step: 
+Elapsed time: 3600.0
+ODE Solution:   message: The solver successfully reached the end of the integration interval.
+  success: True
+   status: 0
+        t: [ 0.000e+00  1.900e+00  2.090e+01  2.109e+02  2.111e+03
+             3.600e+03]
+        y: [[ 3.486e+07  3.486e+07 ...  3.491e+07  3.495e+07]
+            [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08]
+            ...
+            [ 3.546e+00  3.546e+00 ...  3.550e+00  3.554e+00]
+            [ 3.366e+03  3.366e+03 ...  3.366e+03  3.366e+03]]
+      sol: None
+ t_events: None
+ y_events: None
+     nfev: 32
+     njev: 0
+      nlu: 0
+delta_state: [ 8.4160000e+04  1.2784000e+04 -2.4318695e-03  7.7040195e-03
+ -3.5888672e-02]
+planet_radii: [695700.]
+a: 2.3247046e+08
+e: 0.00010934778
+w: 156.20451
+theta: 164.84969
+max_thrust: 0.00133
+ISP: 3872.0
+
+
diff --git a/src/python/Constants.py b/src/python/Constants.py
index 8498b7c..675abbb 100644
--- a/src/python/Constants.py
+++ b/src/python/Constants.py
@@ -1,5 +1,7 @@
 class Constants:
-    RADIUS_SUN_M = 6.957e8
-    RADIUS_EARTH_M = 6.378e6
+    RADIUS_SUN_M = 6.957e8  # m
+    RADIUS_EARTH_M = 6.378e6  # m
     MU_SUN = 1.3e11
+    MU_SUN_M = 1.3e20  # m^3/s^2
     G0 = 9.80665  # m/s^2
+    G0_KM = G0 / 1000
diff --git a/src/python/Ephemeris.py b/src/python/Ephemeris.py
index 988df60..0be8921 100644
--- a/src/python/Ephemeris.py
+++ b/src/python/Ephemeris.py
@@ -23,7 +23,7 @@ def __init__(self):
         # initialize an empty ephemeris object
         self.reset()
 
-    def add_data(self, et, x, y, vx, vy, m, alpha_x, alpha_y, u):
+    def add_data(self, et, x, y, vx, vy, m, alpha_x=0.0, alpha_y=0.0, u=0.0):
         self.arr_et = np.append(self.arr_et, et)
         self.arr_x = np.append(self.arr_x, x)
         self.arr_y = np.append(self.arr_y, y)
@@ -35,7 +35,9 @@ def add_data(self, et, x, y, vx, vy, m, alpha_x, alpha_y, u):
         self.arr_u = np.append(self.arr_u, u)
         self.num_vectors = self.num_vectors + 1
 
-    def add_polar_data(self, et, r, theta, r_dot, v_theta, m, alpha_x, alpha_y, u):
+    def add_polar_data(
+        self, et, r, theta, r_dot, v_theta, m, alpha_x=0.0, alpha_y=0.0, u=0.0
+    ):
         # convert polar coordinates to cartesian
         x = r * np.cos(theta)
         y = r * np.sin(theta)
@@ -150,7 +152,7 @@ def plot_xy_ref_orbit(self, orbit_sma, label):
 
     def plot_all_ephemeris_data(self):
         figs = []
-        
+
         fig, ax = plot.subplots(figsize=(6, 6))
         ax.plot(self.arr_et, self.arr_m, label="Spacecraft Mass")
         ax.set_title("Spacecraft Mass over Time")
@@ -181,8 +183,8 @@ def plot_all_ephemeris_data(self):
         ax.legend(loc="lower right")
         plot.show()
         figs.append(fig)
-        
-        return figs;
+
+        return figs
 
     def write_to_file(self, file_path, mod_vector_write_frequency=1):
         file_name_base = os.path.basename(file_path)
diff --git a/src/python/NN_Utils.py b/src/python/NN_Utils.py
index e63f11c..9951ec9 100644
--- a/src/python/NN_Utils.py
+++ b/src/python/NN_Utils.py
@@ -1,6 +1,7 @@
 import torch
 import matplotlib.pyplot as plt
 import numpy as np
+import os
 
 from StateVectorUtilities import non_dimensionalize
 from torch.utils.data import TensorDataset, random_split
@@ -40,7 +41,7 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas u")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_u.jpg")  # Vector format
+            fig.savefig(os.path.join(dir_plots, "nn_val_compare_u.jpg"))
 
             fig, ax = plt.subplots(figsize=(6, 6))
             ax.scatter(
@@ -51,7 +52,7 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas $\alpha_y$")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_alpha_x.jpg")  # Vector format
+            fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_x.jpg"))
 
             fig, ax = plt.subplots(figsize=(6, 6))
             ax.scatter(
@@ -62,7 +63,7 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas $\alpha_y$")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_alpha_y.jpg")  # Vector format
+            fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_y.jpg"))
 
         elif params["control_data_set"] == "u":
 
@@ -79,7 +80,7 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas u")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_u.jpg")  # Vector format
+            fig.savefig(os.path.join(dir_plots, "nn_val_compare_u.jpg"))
 
         elif params["control_data_set"] == "alpha":
 
@@ -92,7 +93,9 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas $\alpha_x$")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_alpha_x.jpg")  # Vector format
+            fig.savefig(
+                os.path.join(dir_plots, "nn_val_compare_alpha_x.jpg")
+            )  # Vector format
 
             fig, ax = plt.subplots(figsize=(6, 6))
             ax.scatter(
@@ -103,7 +106,7 @@ def evaluate_neural_network(
             ax.set_ylabel(r"Control deltas $\alpha_y$")
             ax.legend()
             fig.tight_layout()
-            fig.savefig(dir_plots + "nn_val_compare_alpha_y.jpg")  # Vector format
+            fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_y.jpg"))
 
         else:
             raise Exception(
@@ -127,7 +130,7 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params):
 
         vector = sample_ephem_compare.get_vector_at_index(i)
 
-        arr_control = query_NN_at_ephem_state(NN_TBT, vector, params)
+        arr_control = query_NN_at_state(NN_TBT, vector, params)
 
         if params["control_data_set"] == "all":
             arr_u_nn.append(arr_control[0])
@@ -158,7 +161,7 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params):
     ax.set_ylabel("Ephemeris Throttle u")
     ax.legend()
     fig.tight_layout()
-    fig.savefig(dir_plots + "nn_ephem_compare_u.jpg")
+    fig.savefig(os.path.join(dir_plots, "nn_ephem_compare_u.jpg"))
 
     fig, ax = plt.subplots(figsize=(6, 6))
     ax.plot(
@@ -191,19 +194,19 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params):
     ax.set_ylabel("Ephemeris Thrust Direction")
     ax.legend()
     fig.tight_layout()
-    fig.savefig(dir_plots + "nn_ephem_compare_alpha.jpg")
+    fig.savefig(os.path.join(dir_plots, "nn_ephem_compare_alpha.jpg"))
 
 
-def query_NN_at_ephem_state(NN_TBT, vector, params):
+def query_NN_at_state(NN_TBT, vector, params):
 
     # unpack components of interest
-    x = vector[1]
-    y = vector[2]
-    vx = vector[3]
-    vy = vector[4]
-    m = vector[5]
+    x = vector[0]
+    y = vector[1]
+    vx = vector[2]
+    vy = vector[3]
+    m = vector[4]
 
-    # pack into an array
+    # pack into a new array
     state = [x, y, vx, vy, m]
 
     # non-dimensionalize the state vector
@@ -228,7 +231,7 @@ def query_NN_at_ephem_state(NN_TBT, vector, params):
 
     # if we are using BCE with logits, convert to a thrust action
     # otherwise the NN directly outputs the control action
-    if params["loss"] == "BCEWithLogitsLoss":
+    if "loss" in params and (params["loss"] == "BCEWithLogitsLoss"):
         probs = torch.sigmoid(nn_output)
         nn_control = (probs > 0.5).float()
     else:
diff --git a/src/python/Propagation.py b/src/python/Propagation.py
index fc706e0..7d2c33b 100644
--- a/src/python/Propagation.py
+++ b/src/python/Propagation.py
@@ -489,3 +489,71 @@ def smoothing_function_homotopic(rho, eps, flag_constrain_u):
             u_smooth = 1 / 2 + (rho / 2 * eps)
 
     return u_smooth
+
+
+def env_EOM_TBT_v2(t, state, params):
+    """
+    Two-Body Orbit Transfer Gym Environment ode propagation function
+    -----------------------------------------------------------------------------------
+    This is a set of equations of motion that govern a spacecraft in the
+    2-dimensional two-body problem, formatted specifically to the Two-Body
+    Transfer gym environment
+
+    Inputs
+    -----------------------------------------------------------------------------------
+    t:      Elapsed time
+    state:  Input state vector (x,y,vx,vy,m)
+    params: The list of parameters ()
+
+    Outputs
+    -----------------------------------------------------------------------------------
+    Derivatives for state vector dy
+    """
+
+    # get the number of parameters
+    num_params = len(params)
+
+    # check parameter length
+    if num_params != 7:
+        raise Exception("Invalid number of parameters")
+
+    # unpack the state vector
+    x, y, vx, vy, m = state[:5]
+
+    # unpack the parameters
+    mu = params[0]  # gravitational parameter of the central body
+    T_max = params[1]  # max thrust of the spacecraft
+    ISP = params[2]  # spacecraft specific impulse
+    g0 = params[3]  # acceleration at Earth surface
+    u = params[4]  # spacecraft throttle control
+    alpha_x = params[5]  # thrust x direction
+    alpha_y = params[6]  # thrust y direction
+
+    # create vectors
+    r_vec = np.array([x, y])
+    v_vec = np.array([vx, vy])
+    alpha_vec = np.array([alpha_x, alpha_y])
+
+    # enfore unit vector
+    if (alpha_x**2 + alpha_y**2) != 0.0:
+        alpha_vec = alpha_vec / (alpha_x**2 + alpha_y**2)
+
+    # Derivative calculation preliminaries
+    r = np.linalg.norm(r_vec)
+
+    # state vector derivative calculations
+    dr_vec = v_vec
+    dv_vec = -mu / r**3 * r_vec + u * T_max / m * alpha_vec
+    dm = -T_max * u / ISP / g0
+
+    derivs = np.array(
+        [
+            dr_vec[0],
+            dr_vec[1],
+            dv_vec[0],
+            dv_vec[1],
+            dm,
+        ]
+    )
+
+    return derivs
diff --git a/src/python/Spacecraft.py b/src/python/Spacecraft.py
index 71be4ea..c37b0bc 100644
--- a/src/python/Spacecraft.py
+++ b/src/python/Spacecraft.py
@@ -1,16 +1,17 @@
 import numpy as np
+from StateVectorUtilities import polar_to_cartesian, cartesian_to_polar
 
 
 class Spacecraft:
     def __init__(self, r, theta, r_dot, v_theta, mass, C1, C2):
         # Initialize the state of the spacecraft
-        self.update_state(r, theta, r_dot, v_theta, mass)
+        self.update_state_polar(r, theta, r_dot, v_theta, mass)
 
         # Set propulsion parameters
         self.max_thrust = C1
         self.specific_impulse = C2
 
-    def update_state(self, r, theta, r_dot, v_theta, mass):
+    def update_state_polar(self, r, theta, r_dot, v_theta, mass):
         # Set state vector polar coordinates coordinates
         self.r = r
         self.theta = theta
@@ -21,10 +22,7 @@ def update_state(self, r, theta, r_dot, v_theta, mass):
         self.mass = mass
 
         # convert polar coordinates to cartesian
-        x = r * np.cos(theta)
-        y = r * np.sin(theta)
-        vx = r_dot * np.cos(theta) - v_theta * r * np.sin(theta)
-        vy = r_dot * np.sin(theta) + v_theta * r * np.cos(theta)
+        x, y, vx, vy = polar_to_cartesian(r, theta, r_dot, v_theta)
 
         # Set state vector cartesian coordinates
         self.x = x
@@ -32,6 +30,22 @@ def update_state(self, r, theta, r_dot, v_theta, mass):
         self.vx = vx
         self.vy = vy
 
+    def update_state_cartesian(self, x, y, vx, vy, m):
+
+        self.x = x
+        self.y = y
+        self.vx = vx
+        self.vy = vy
+        self.mass = m
+
+        r, theta, rdot, v_theta = cartesian_to_polar(x, y, vx, vy)
+
+        # update polar coordinates
+        self.r = r
+        self.theta = theta
+        self.r_dot = rdot
+        self.v_theta = v_theta
+
     def calc_Planar_OE(self, x_cb, y_cb, vx_cb, vy_cb, mu_cb):
         # determine coordinates relative to central body
         x_rel = self.x - x_cb
@@ -101,3 +115,7 @@ def calc_Planar_OE(self, x_cb, y_cb, vx_cb, vy_cb, mu_cb):
             theta = 2 * np.pi - np.acos(dotp)
 
         return a, e, w, theta
+
+    def get_cartesian_state(self):
+
+        return self.x, self.y, self.vx, self.vy
diff --git a/src/python/TwoBody_Orb2Orb_Transfer_Env.py b/src/python/TwoBody_Orb2Orb_Transfer_Env.py
index 4f1260e..03242b3 100644
--- a/src/python/TwoBody_Orb2Orb_Transfer_Env.py
+++ b/src/python/TwoBody_Orb2Orb_Transfer_Env.py
@@ -4,7 +4,8 @@
 from scipy.integrate import solve_ivp
 from Constants import Constants
 from Spacecraft import Spacecraft
-from Propagation import spacecraft_EOM_radial_2D_EB
+from Propagation import env_EOM_TBT_v2
+from StateVectorUtilities import polar_to_cartesian
 
 
 class TwoBody_Orb2Orb_Transfer_Env(gym.Env):
@@ -26,16 +27,19 @@ def __init__(self):
 
         # list of environment parameters (Sun is the central body)
         self.arr_mu = np.array([Constants.MU_SUN])  # solar mu [km^3/s^2]
-        self.planet_radii = np.array([Constants.RADIUS_SUN_M/1000])  # solar radius [km]
+        self.planet_radii = np.array(
+            [Constants.RADIUS_SUN_M / 1000]
+        )  # solar radius [km]
         self.elapsed_t = 0.0
         self.step_size = 3600.0
 
         # define the action space
-        # The action space consists of two variables:
+        # The action space consists of three variables:
         #    1) a control throlle input (scaled from 0 to 1)
-        #    2) a thrust vector (0 to 2pi)
-        low_array_action = np.array([0.0, 0.0], dtype=np.float32)
-        high_array_action = np.array([1.0, 2 * np.pi], dtype=np.float32)
+        #    2) a thrust direction x vector component (ranges -1 to 1)
+        #    3) a thrust direction y vector component (ranges -1 to 1)
+        low_array_action = np.array([0.0, -1.0, -1.0], dtype=np.float32)
+        high_array_action = np.array([1.0, 1.0, 1.0], dtype=np.float32)
         self.action_space = gym.spaces.Box(low=low_array_action, high=high_array_action)
 
     def _get_info(self, ode_solution, delta_r):
@@ -80,10 +84,11 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
         vx_cb = 0.0
         vy_cb = 0.0
 
+        # convert to cartesian coordinates with random theta as input
+        x, y, vx, vy = polar_to_cartesian(r, theta, r_dot, v_theta)
+
         # set the initial state of the environment
-        self._state = np.array(
-            [r, theta, r_dot, v_theta, mass, mu, sma_target], dtype=np.float32
-        )
+        self._state = np.array([x, y, vx, vy, mass, mu, sma_target], dtype=np.float32)
 
         # set the location of the central body
         self._arr_cb = np.array([x_cb, y_cb, vx_cb, vy_cb], dtype=np.float32)
@@ -155,10 +160,10 @@ def _apply_dV_in_VNB_frame(self, dV, X_i, Y_i, VX_i, VY_i):
 
     def step(self, action):
         # unpack the state vector
-        r = self._state[0]
-        theta = self._state[1]
-        r_dot = self._state[2]
-        v_theta = self._state[3]
+        x = self._state[0]
+        y = self._state[1]
+        vx = self._state[2]
+        vy = self._state[3]
         mass = self._state[4]
         mu = self._state[5]
         sma_target = self._state[6]
@@ -174,27 +179,27 @@ def step(self, action):
 
         # unpack the action vector
         u = action[0]  # throttle control
-        beta = action[1]  # spacecraft attitude
+        alpha_x = action[1]  # spacecraft thrust x-direction
+        alpha_y = action[2]  # spacecraft thrust y-direction
 
         # step the spacecraft forward
         t_span = (0.0, self.step_size)
-        y0 = np.array([r, theta, r_dot, v_theta, mass])
+        y0 = np.array([x, y, vx, vy, mass])
         params = np.array(
             [
                 self.arr_mu[0],
-                self.planet_radii[0],
                 sc.max_thrust,
                 sc.specific_impulse,
+                Constants.G0_KM,
                 u,
-                beta,
+                alpha_x,
+                alpha_y,
             ],
             dtype=np.float32,
         )
 
         # solve ODE
-        solution = solve_ivp(
-            spacecraft_EOM_radial_2D_EB, t_span, y0, method="RK45", args=(params,)
-        )
+        solution = solve_ivp(env_EOM_TBT_v2, t_span, y0, method="RK45", args=(params,))
 
         # extract the final state vector from ODE solution (last column in y)
         y_final = (solution.y[:, -1]).astype(np.float32)
@@ -205,10 +210,9 @@ def step(self, action):
         # update the state and elapsed time
         self.elapsed_t = self.elapsed_t + self.step_size
         self._state = np.append(y_final, [mu, sma_target])
-        # self._state[4]  and self._state[5] are constant
 
         # update the spacecraft object
-        sc.update_state(*y_final)
+        sc.update_state_cartesian(*y_final)
 
         # update the environment spacecraft object
         self._spacecraft = sc
diff --git a/src/scripts/train_alpha_network.py b/src/scripts/train_alpha_network.py
index dd78452..2e33312 100644
--- a/src/scripts/train_alpha_network.py
+++ b/src/scripts/train_alpha_network.py
@@ -40,11 +40,11 @@ def train_u_network():
     # parameters
     params = {
         "training_data_pts": 1000,  # training data batch size
-        "training_epochs": 1000,  # number of training epochs to run
+        "training_epochs": 100,  # number of training epochs to run
         "patience": 200000,  # Epochs without training loss improvement to stop training
         "learning_rate_i": 0.1,  # Initial Parameter learning rate
         "learning_rate_f": 0.1,  # Final Parameter learning rate
-        "plot_update": 1000,  # Number of epochs before plot is updated
+        "plot_update": 100,  # Number of epochs before plot is updated
         "report_update": 1,  # Number of epochs between reporting training status
         "train_fraction": 0.8,  # Fraction of data to use for training
         "eval_fraction": 0.2,  # Fraction of data to use for eval
@@ -65,14 +65,15 @@ def train_u_network():
     # paths
     print("Current wd: " + os.getcwd())
     dir_training_dir = (
-        "..\\data\\training_ephems\\test_set_bang_bang\\"  # path to training data
+        "..\\data\\training_ephems\\test_set_bang_bang_subset\\"  # path to training data
     )
     dir_plots = "..\\data\\plots\\"  # path for storing plot data
     dir_nn = "..\\data\\neural_networks\\"  # path for saving trained nn
     path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir))
     path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots))
     path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn))
-    path_plot_nn_training = path_plots + "nn_training.jpg"
+    path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth"))
+    path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg")
 
     # plotting structure init
     arr_epochs = []
@@ -186,7 +187,7 @@ def train_u_network():
     )
 
     # save NN to file
-    torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth")
+    torch.save(NN_TBT.state_dict(), path_nn )
 
 
 train_u_network()
diff --git a/src/scripts/train_neural_network.py b/src/scripts/train_neural_network.py
index 7e0d5a9..7a01cad 100644
--- a/src/scripts/train_neural_network.py
+++ b/src/scripts/train_neural_network.py
@@ -14,7 +14,7 @@
 from gymnasium.envs.registration import register
 from torch.utils.data import DataLoader
 from torch.optim.lr_scheduler import CosineAnnealingLR
-from Neural_Net_Controller import NN_TBT_Controller
+from Neural_Net_Controllers import NN_TBT_Controller
 from Training_Data_Generation import read_ephems_from_dir
 from Constants import Constants
 from Plotting_Utils import format_plots, plot_training_loss
@@ -38,19 +38,19 @@
 def train_neural_network():
 
     # parameters
-    training_data_pts = 1000  # training data batch size
-    training_epochs = 100000  # number of training epochs to run
-    min_mse = 999999  # min mse init value
-    patience = 200000  # If the number of iterations since  min is greater than this number - training ends
-    learning_rate_i = 0.1  # Initial Parameter learning rate
-    learning_rate_f = 0.1  # Final Parameter learning rate
-    plot_update = training_epochs  # Number of epochs before plot is updated
-    report_update = 1000  # Number of epochs between reporting training status
-    train_fraction = 0.8  # Fraction of data to use for training
-    eval_fraction = 0.2  # Fraction of data to use for eval
-    annealing_tmax = 1000
-
     params = {
+        "training_data_pts": 1000,  # training data batch size
+        "training_epochs": 1000,  # number of training epochs to run
+        "patience": 200000,  # Epochs without training loss improvement to stop training
+        "learning_rate_i": 0.4,  # Initial Parameter learning rate
+        "learning_rate_f": 0.4,  # Final Parameter learning rate
+        "plot_update": 1000,  # Number of epochs before plot is updated
+        "report_update": 10,  # Number of epochs between reporting training status
+        "train_fraction": 0.8,  # Fraction of data to use for training
+        "eval_fraction": 0.2,  # Fraction of data to use for eval
+        "annealing_tmax": 1000,  # Cosine annealing max iters
+        "loss": "MSE",  # MSE, BCEWithLogitsLoss
+        "control_data_set": "all",  # Control data sets to train (all, u, alpha)
         "mu": Constants.MU_SUN * 10 ** (9),  # sun mu [m^3/s^2]
         "max_T": 1.33,  # max spacecraft thrust [N]
         "ISP": 3872.0,  # spacecraft specific impulse [s]
@@ -64,14 +64,15 @@ def train_neural_network():
 
     # paths
     dir_training_dir = (
-        "\\data\\training_ephems\\test_set_bang_bang_subset\\"  # path to training data
+        "..\\data\\training_ephems\\test_set_bang_bang_subset\\"  # path to training data
     )
-    dir_plots = "\\data\\plots\\"  # path for storing plot data
-    dir_nn = "\\data\\neural_networks\\"  # path for saving trained nn
-    path_training_dir = os.getcwd() + dir_training_dir
-    path_plots = os.getcwd() + dir_plots
-    path_nn = os.getcwd() + dir_nn
-    path_plot_nn_training = path_plots + "nn_training.jpg"
+    dir_plots = "..\\data\\plots\\"  # path for storing plot data
+    dir_nn = "..\\data\\neural_networks\\"  # path for saving trained nn
+    path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir))
+    path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots))
+    path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn))
+    path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth"))
+    path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg")
 
     # plotting structure init
     arr_epochs = []
@@ -86,11 +87,11 @@ def train_neural_network():
 
     # establish optimizer
     # optimizer = torch.optim.Adam(NN_TBT.parameters(), lr=learning_rate_i)
-    optimizer = torch.optim.SGD(NN_TBT.parameters(), lr=learning_rate_i)
+    optimizer = torch.optim.SGD(NN_TBT.parameters(), lr=params["learning_rate_i"])
 
     # define a LR scheduler
     scheduler = CosineAnnealingLR(
-        optimizer, T_max=annealing_tmax, eta_min=learning_rate_f
+        optimizer, T_max=params["annealing_tmax"], eta_min=params["learning_rate_f"]
     )
 
     # read ephemeris files
@@ -102,9 +103,7 @@ def train_neural_network():
     print(str(num_ephems * set_ephems[0].num_vectors) + " training data points")
     print("Number of Neural Network Parameters: " + str(num_p))
 
-    train_dataset, val_dataset = pre_process_training_data(
-        set_ephems, train_fraction, eval_fraction, params
-    )
+    train_dataset, val_dataset = pre_process_training_data( set_ephems, params )
 
     # Training
     # --------------------------------------------------------------------------------------------------------
@@ -122,10 +121,10 @@ def train_neural_network():
     NN_TBT.train()
 
     # using torch loader object to load training and eval data
-    train_loader = DataLoader(train_dataset, batch_size=training_data_pts, shuffle=True)
-    val_loader = DataLoader(val_dataset, batch_size=training_data_pts, shuffle=False)
+    train_loader = DataLoader(train_dataset, batch_size=params["training_data_pts"], shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=params["training_data_pts"], shuffle=False)
 
-    while epoch <= training_epochs:
+    while epoch <= params["training_epochs"]:
 
         # perform training epoch
         NN_TBT, avg_train_loss = training_epoch(
@@ -138,7 +137,7 @@ def train_neural_network():
             i_at_min = epoch
 
         # exit condition
-        if epoch > patience + i_at_min:
+        if epoch > params["patience"] + i_at_min:
             print("Patience Criterion reached, exiting training")
             flag_exit = True
             break
@@ -147,7 +146,7 @@ def train_neural_network():
             break
 
         # eval NN
-        if epoch % plot_update == 0:
+        if epoch % params["plot_update"] == 0:
             params["flag_plot"] = True
         else:
             params["flag_plot"] = False
@@ -161,23 +160,23 @@ def train_neural_network():
         arr_loss_train.append(avg_train_loss)
         arr_loss.append(avg_loss_val)
 
-        if epoch % report_update == 0:
+        if epoch % params["report_update"] == 0:
             print(
-                f"Epoch [{epoch}/{training_epochs}], Training Loss: {avg_train_loss:.4e}, Eval loss: {avg_loss_val:.4e}   Min loss: {min_mse:.4e}   last min: {epoch - i_at_min}   lr: {scheduler.get_last_lr()[0]:.4e}"
+                f"Epoch [{epoch}/{params["training_epochs"]}], Training Loss: {avg_train_loss:.4e}, Eval loss: {avg_loss_val:.4e}   Min loss: {min_mse:.4e}   last min: {epoch - i_at_min}   lr: {scheduler.get_last_lr()[0]:.4e}"
             )
 
-        if epoch % plot_update == 0:
+        if epoch % params["plot_update"] == 0:
             plot_training_loss(
-                arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training
+                arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training, params
             )
 
         epoch = epoch + 1
 
     # final training plot update
-    plot_training_loss(arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training)
+    plot_training_loss(arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training, params)
 
     # save NN to file
-    torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth")
+    torch.save(NN_TBT.state_dict(), path_nn )
 
 
 train_neural_network()
diff --git a/src/scripts/train_u_network.py b/src/scripts/train_u_network.py
index 3ce14d1..2ecbf6c 100644
--- a/src/scripts/train_u_network.py
+++ b/src/scripts/train_u_network.py
@@ -63,15 +63,17 @@ def train_u_network():
     }
 
     # paths
+    print("Current wd: " + os.getcwd())
     dir_training_dir = (
-        "\\data\\training_ephems\\test_set_bang_bang\\"  # path to training data
+        "..\\data\\training_ephems\\test_set_bang_bang_subset\\"  # path to training data
     )
-    dir_plots = "\\data\\plots\\"  # path for storing plot data
-    dir_nn = "\\data\\neural_networks\\"  # path for saving trained nn
-    path_training_dir = os.getcwd() + dir_training_dir
-    path_plots = os.getcwd() + dir_plots
-    path_nn = os.getcwd() + dir_nn
-    path_plot_nn_training = path_plots + "nn_training.jpg"
+    dir_plots = "..\\data\\plots\\"  # path for storing plot data
+    dir_nn = "..\\data\\neural_networks\\"  # path for saving trained nn
+    path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir))
+    path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots))
+    path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn))
+    path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth"))
+    path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg")
 
     # plotting structure init
     arr_epochs = []
@@ -186,7 +188,7 @@ def train_u_network():
     )
 
     # save NN to file
-    torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth")
+    torch.save(NN_TBT.state_dict(), path_nn )
 
 
 train_u_network()
diff --git a/src/tests/TBO_Transfer_Env_Test.py b/src/tests/TBO_Transfer_Env_Test.py
index 243e276..403d049 100644
--- a/src/tests/TBO_Transfer_Env_Test.py
+++ b/src/tests/TBO_Transfer_Env_Test.py
@@ -1,6 +1,5 @@
 import numpy as np
 import gymnasium as gym
-import matplotlib.pyplot as plot
 import sys
 import os
 
@@ -8,7 +7,9 @@
 from gymnasium.envs.registration import register
 
 # Adding python src code directory
-sys.path.append(os.path.abspath("../python"))
+current_dir = os.path.dirname(__file__)
+python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python"))
+sys.path.append(python_src_dir)
 
 from Ephemeris import Ephemeris
 
@@ -25,7 +26,7 @@
 env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0")
 
 
-steps_per_traj = 365 * 24 * 3
+steps_per_traj = 86400 * 365 * 1.1 / 3600
 num_traj = 1
 
 
@@ -35,21 +36,20 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj):
     arr_reward_totals = np.array([])
     total_steps_in_env = 0
 
-    for count_traj in range(0, num_traj):
+    for count_traj in range(0, num_trajectories):
         # reset the environment
         steps = 0
         r_tot = 0.0
 
         eph = Ephemeris()
-        observation, info = env.reset()
+        observation, info = env.reset(seed=42)
 
         while steps < steps_per_traj:
-            # Sample randomly from the action space. Since the action is a delta-V
-            # magnitude in km/s, and the action space is unbounded (-inf to inf) the
-            # test maneuver that is returned will be sampled from a Gaussian normal
-            # distribution with a mean of 0 and a standard deviation of 1. We
-            # devide by 1000 in this test case to give relatively small maneuvers.
+            # Arbitrary test action
             action = env.action_space.sample()
+            action[0] = 1.0
+            action[1] = -1.0 + np.tanh(steps / steps_per_traj)
+            action[2] = -1.0 + np.tanh(2 * steps / steps_per_traj)
 
             observation, reward, terminated, truncated, info = env.step(action)
 
@@ -60,13 +60,19 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj):
             if terminated:
                 break
 
-            eph.add_polar_data(
+            if truncated:
+                break
+
+            eph.add_data(
                 elapsed_time,
                 observation[0],
                 observation[1],
                 observation[2],
                 observation[3],
                 observation[4],
+                action[1],
+                action[2],
+                action[0],
             )
 
             # print( elapsed_time, a, e, reward )
@@ -89,20 +95,9 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj):
 
         if count_traj == num_traj - 1:
             print("Plotting last trajectory...")
-            fig = eph.plot_xy(info["planet_radii"])
-            plot.show(fig)
-
-    fig_reward, ax = plot.subplots(figsize=(6, 6))
-
-    ax.plot(arr_episodes, arr_reward_totals, label="Total Reward")
-
-    # Customize the figure
-    ax.set_title("Total Reward Per Episode")
-    ax.set_xlabel("Episode Count")
-    ax.set_ylabel("Total R")
-    ax.legend()
-    ax.grid(False)
-    plot.show(fig_reward)
+            eph.plot_xy(info["planet_radii"])
+            eph.plot_xy_ref_orbit(observation[6], "Earth Orbit")
+            eph.plot_all_ephemeris_data()
 
     print("Test successful")
 
diff --git a/src/tests/test_env_step_no_action.py b/src/tests/test_env_step_no_action.py
new file mode 100644
index 0000000..9d6a302
--- /dev/null
+++ b/src/tests/test_env_step_no_action.py
@@ -0,0 +1,120 @@
+import numpy as np
+import gymnasium as gym
+import sys
+import os
+import filecmp
+
+from gymnasium import envs
+from gymnasium.envs.registration import register
+
+# Adding python src code directory
+current_dir = os.path.dirname(__file__)
+python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python"))
+sys.path.append(python_src_dir)
+
+
+# register the environment if it isn't registered
+if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys():
+    register(
+        id="TwoBody_Orb2Orb_Transfer_Env-v0",
+        entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env",
+    )
+
+
+# initialize the environment
+env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0")
+seed_in = 42
+
+
+def log(info, log, flag_report_to_console=False):
+
+    log.append(info)
+
+    if flag_report_to_console:
+        print(info)
+
+    return log
+
+
+def test_env_step_no_action(env, seed_in):
+
+    test_log = []
+    test_log = log("Test Environment Step with No Action", test_log, True)
+
+    observation_2, info = env.reset(seed=seed_in)
+    test_log = log("Environment has been reset", test_log, True)
+    test_log = log("Seed: " + str(seed_in), test_log, True)
+
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation_2[0]), test_log, True)
+    test_log = log("Y: " + str(observation_2[1]), test_log, True)
+    test_log = log("VX: " + str(observation_2[2]), test_log, True)
+    test_log = log("VY: " + str(observation_2[3]), test_log, True)
+    test_log = log("m: " + str(observation_2[4]), test_log, True)
+    test_log = log("mu: " + str(observation_2[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation_2[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info: ", test_log, True)
+    for key in info.keys():
+        test_log = log(str(key) + ": " + str(info[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # zero action
+    action = np.array([0.0, 0.0, 0.0])
+
+    test_log = log("Action", test_log, True)
+    test_log = log("u: " + str(action[0]), test_log, True)
+    test_log = log("alpha_x: " + str(action[1]), test_log, True)
+    test_log = log("alpha_y: " + str(action[2]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # step the environment
+    observation_2, reward, terminated, truncated, info_2 = env.step(action)
+
+    # update observation_2
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation_2[0]), test_log, True)
+    test_log = log("Y: " + str(observation_2[1]), test_log, True)
+    test_log = log("VX: " + str(observation_2[2]), test_log, True)
+    test_log = log("VY: " + str(observation_2[3]), test_log, True)
+    test_log = log("m: " + str(observation_2[4]), test_log, True)
+    test_log = log("mu: " + str(observation_2[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation_2[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # Report the reward
+    test_log = log("reward: " + str(reward), test_log, True)
+    test_log = log("terminated: " + str(terminated), test_log, True)
+    test_log = log("truncated: " + str(truncated), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info post-step: ", test_log, True)
+    for key in info_2.keys():
+        test_log = log(str(key) + ": " + str(info_2[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # write the log to a text file
+    dir_test = os.path.normpath(
+        os.path.join(os.getcwd(), "data\\test_data\\test_env_step_no_action\\")
+    )
+    path_test_report = os.path.normpath(
+        os.path.join(dir_test, "output_test_env_step_no_action_log.txt")
+    )
+    path_test_truth = os.path.normpath(
+        os.path.join(dir_test, "truth_test_env_step_no_action_log.txt")
+    )
+    with open(path_test_report, "w", encoding="utf-8") as f:
+        for line in test_log:
+            f.write(line + "\n")
+
+    # compare the two files
+    print("output log: ", path_test_report)
+    print("truth log: ", path_test_truth)
+    are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False)
+    print("Test passed? ", are_same)
+
+
+test_env_step_no_action(env, seed_in)
diff --git a/src/tests/test_env_step_with_action.py b/src/tests/test_env_step_with_action.py
new file mode 100644
index 0000000..d6a55c5
--- /dev/null
+++ b/src/tests/test_env_step_with_action.py
@@ -0,0 +1,120 @@
+import numpy as np
+import gymnasium as gym
+import sys
+import os
+import filecmp
+
+from gymnasium import envs
+from gymnasium.envs.registration import register
+
+# Adding python src code directory
+current_dir = os.path.dirname(__file__)
+python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python"))
+sys.path.append(python_src_dir)
+
+
+# register the environment if it isn't registered
+if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys():
+    register(
+        id="TwoBody_Orb2Orb_Transfer_Env-v0",
+        entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env",
+    )
+
+
+# initialize the environment
+env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0")
+seed_in = 42
+
+
+def log(info, log, flag_report_to_console=False):
+
+    log.append(info)
+
+    if flag_report_to_console:
+        print(info)
+
+    return log
+
+
+def test_env_step_no_action(env, seed_in):
+
+    test_log = []
+    test_log = log("Test Environment Step with Thrust Action", test_log, True)
+
+    observation_2, info = env.reset(seed=seed_in)
+    test_log = log("Environment has been reset", test_log, True)
+    test_log = log("Seed: " + str(seed_in), test_log, True)
+
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation_2[0]), test_log, True)
+    test_log = log("Y: " + str(observation_2[1]), test_log, True)
+    test_log = log("VX: " + str(observation_2[2]), test_log, True)
+    test_log = log("VY: " + str(observation_2[3]), test_log, True)
+    test_log = log("m: " + str(observation_2[4]), test_log, True)
+    test_log = log("mu: " + str(observation_2[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation_2[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info: ", test_log, True)
+    for key in info.keys():
+        test_log = log(str(key) + ": " + str(info[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # throttle is 1 and unit vector components are 1 and 1 (vector is normalized in "step")
+    action = np.array([1.0, 1.0, 1.0])
+
+    test_log = log("Action", test_log, True)
+    test_log = log("u: " + str(action[0]), test_log, True)
+    test_log = log("alpha_x: " + str(action[1]), test_log, True)
+    test_log = log("alpha_y: " + str(action[2]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # step the environment
+    observation_2, reward, terminated, truncated, info_2 = env.step(action)
+
+    # update observation_2
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation_2[0]), test_log, True)
+    test_log = log("Y: " + str(observation_2[1]), test_log, True)
+    test_log = log("VX: " + str(observation_2[2]), test_log, True)
+    test_log = log("VY: " + str(observation_2[3]), test_log, True)
+    test_log = log("m: " + str(observation_2[4]), test_log, True)
+    test_log = log("mu: " + str(observation_2[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation_2[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # Report the reward
+    test_log = log("reward: " + str(reward), test_log, True)
+    test_log = log("terminated: " + str(terminated), test_log, True)
+    test_log = log("truncated: " + str(truncated), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info post-step: ", test_log, True)
+    for key in info_2.keys():
+        test_log = log(str(key) + ": " + str(info_2[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # write the log to a text file
+    dir_test = os.path.normpath(
+        os.path.join(os.getcwd(), "data\\test_data\\test_env_step_with_action\\")
+    )
+    path_test_report = os.path.normpath(
+        os.path.join(dir_test, "output_test_env_step_with_action_log.txt")
+    )
+    path_test_truth = os.path.normpath(
+        os.path.join(dir_test, "truth_test_env_step_with_action_log.txt")
+    )
+    with open(path_test_report, "w", encoding="utf-8") as f:
+        for line in test_log:
+            f.write(line + "\n")
+
+    # compare the two files
+    print("output log: ", path_test_report)
+    print("truth log: ", path_test_truth)
+    are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False)
+    print("Test passed? ", are_same)
+
+
+test_env_step_no_action(env, seed_in)
diff --git a/src/tests/test_env_step_with_nn_action.py b/src/tests/test_env_step_with_nn_action.py
new file mode 100644
index 0000000..6f7550b
--- /dev/null
+++ b/src/tests/test_env_step_with_nn_action.py
@@ -0,0 +1,165 @@
+import gymnasium as gym
+import sys
+import os
+import torch
+import filecmp
+
+from gymnasium import envs
+from gymnasium.envs.registration import register
+
+# Adding python src code directory
+current_dir = os.path.dirname(__file__)
+python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python"))
+sys.path.append(python_src_dir)
+
+from NN_Utils import query_NN_at_state
+from Constants import Constants
+from Neural_Net_Controllers import NN_TBT_Controller
+
+
+# register the environment if it isn't registered
+if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys():
+    register(
+        id="TwoBody_Orb2Orb_Transfer_Env-v0",
+        entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env",
+    )
+
+
+# initialize the environment
+env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0")
+seed_in = 42
+
+
+def log(info, log, flag_report_to_console=False):
+
+    log.append(info)
+
+    if flag_report_to_console:
+        print(info)
+
+    return log
+
+
+def test_env_step_no_action(env, seed_in):
+
+    test_log = []
+    test_log = log(
+        "Test Environment Step with Neural Network Thrust Action", test_log, True
+    )
+
+    # paths
+    path_test_dir = os.path.normpath(
+        os.path.join(os.getcwd(), "data\\test_data\\test_env_step_with_nn_action\\")
+    )
+    path_test_report = os.path.normpath(
+        os.path.join(path_test_dir, "output_test_env_step_with_nn_action_log.txt")
+    )
+    path_test_truth = os.path.normpath(
+        os.path.join(path_test_dir, "truth_test_env_step_with_nn_action_log.txt")
+    )
+    path_input_nn = os.path.normpath(
+        os.path.join(
+            path_test_dir, "nn_controller_weights_smoothed_full_10e3_epochs.pth"
+        )
+    )
+
+    observation, info = env.reset(seed=seed_in)
+    test_log = log("Environment has been reset", test_log, True)
+    test_log = log("Seed: " + str(seed_in), test_log, True)
+
+    # load neural network from file
+    nn_controller = NN_TBT_Controller()  # instantiate NN object
+    nn_control_param_dict = torch.load(
+        path_input_nn
+    )  # load parameter dictionary from file
+    nn_controller.load_state_dict(
+        nn_control_param_dict
+    )  # load the state parameter dictionary
+
+    test_log = log("Neural Net loaded from: " + str(path_input_nn), test_log, True)
+
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation[0]), test_log, True)
+    test_log = log("Y: " + str(observation[1]), test_log, True)
+    test_log = log("VX: " + str(observation[2]), test_log, True)
+    test_log = log("VY: " + str(observation[3]), test_log, True)
+    test_log = log("m: " + str(observation[4]), test_log, True)
+    test_log = log("mu: " + str(observation[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info: ", test_log, True)
+    for key in info.keys():
+        test_log = log(str(key) + ": " + str(info[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # pack NN state
+    x = observation[0]
+    y = observation[1]
+    vx = observation[2]
+    vy = observation[3]
+    m = observation[4]
+    state = [x, y, vx, vy, m]
+
+    # define normalization parameters
+    params = {
+        "mu": Constants.MU_SUN * 10 ** (9),  # sun mu [m^3/s^2]
+        "max_T": 1.33,  # max spacecraft thrust [N]
+        "ISP": 3872.0,  # spacecraft specific impulse [s]
+        "TOF": 1.1 * 365.25 * 24 * 60 * 60,  # assumed time of flight [s]
+        "l_star": 149598023000,  # characteristic length = Earth SMA [m]
+        "m_star": 3366.0,  # characteristic mass = SC initial mass [kg]
+        "t_star": (149598023000**3 / (Constants.MU_SUN * 10 ** (9)))
+        ** 0.5,  # characteristic time - derived
+        "g0": Constants.G0,  # gravtational acceleration at Earth surface [m/s^2]
+    }
+
+    # get action from NN
+    action = query_NN_at_state(nn_controller, state, params)
+
+    test_log = log("Action", test_log, True)
+    test_log = log("u: " + str(action[0]), test_log, True)
+    test_log = log("alpha_x: " + str(action[1]), test_log, True)
+    test_log = log("alpha_y: " + str(action[2]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # step the environment
+    observation, reward, terminated, truncated, info_2 = env.step(action)
+
+    # update observation
+    test_log = log("Observation: ", test_log, True)
+    test_log = log("X: " + str(observation[0]), test_log, True)
+    test_log = log("Y: " + str(observation[1]), test_log, True)
+    test_log = log("VX: " + str(observation[2]), test_log, True)
+    test_log = log("VY: " + str(observation[3]), test_log, True)
+    test_log = log("m: " + str(observation[4]), test_log, True)
+    test_log = log("mu: " + str(observation[5]), test_log, True)
+    test_log = log("sma_target: " + str(observation[6]), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    # Report the reward
+    test_log = log("reward: " + str(reward), test_log, True)
+    test_log = log("terminated: " + str(terminated), test_log, True)
+    test_log = log("truncated: " + str(truncated), test_log, True)
+    test_log = log("\n", test_log, True)
+
+    test_log = log("Info post-step: ", test_log, True)
+    for key in info_2.keys():
+        test_log = log(str(key) + ": " + str(info_2[key]), test_log, True)
+
+    test_log = log("\n", test_log, True)
+
+    # write the log to a text file
+    with open(path_test_report, "w", encoding="utf-8") as f:
+        for line in test_log:
+            f.write(line + "\n")
+
+    # compare the two files
+    print("output log: ", path_test_report)
+    print("truth log: ", path_test_truth)
+    are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False)
+    print("Test passed? ", are_same)
+
+
+test_env_step_no_action(env, seed_in)