diff --git a/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth b/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth new file mode 100644 index 0000000..e4eeaf0 Binary files /dev/null and b/data/neural_networks/nn_controller_weights_bang_bang_alpha.pth differ diff --git a/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth b/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth new file mode 100644 index 0000000..6d42660 Binary files /dev/null and b/data/neural_networks/nn_controller_weights_bang_bang_u_only.pth differ diff --git a/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt b/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt new file mode 100644 index 0000000..1f08793 --- /dev/null +++ b/data/test_data/test_env_step_no_action/output_test_env_step_no_action_log.txt @@ -0,0 +1,76 @@ +Test Environment Step with No Action +Environment has been reset +Seed: 42 +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 0.0 +alpha_x: 0.0 +alpha_y: 0.0 + + +Observation: +X: 3.4947384e+07 +Y: -2.2985344e+08 +VX: 23.377726 +VY: 3.5543969 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.881e+00 2.070e+01 2.088e+02 2.090e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.551e+00 3.554e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4164000e+04 1.2784000e+04 -1.3008118e-03 8.5599422e-03 + 0.0000000e+00] +planet_radii: [695700.] +a: 2.3249498e+08 +e: 9.084384e-08 +w: 170.61444 +theta: 179.25969 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt b/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt new file mode 100644 index 0000000..1f08793 --- /dev/null +++ b/data/test_data/test_env_step_no_action/truth_test_env_step_no_action_log.txt @@ -0,0 +1,76 @@ +Test Environment Step with No Action +Environment has been reset +Seed: 42 +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 0.0 +alpha_x: 0.0 +alpha_y: 0.0 + + +Observation: +X: 3.4947384e+07 +Y: -2.2985344e+08 +VX: 23.377726 +VY: 3.5543969 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.881e+00 2.070e+01 2.088e+02 2.090e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.551e+00 3.554e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4164000e+04 1.2784000e+04 -1.3008118e-03 8.5599422e-03 + 0.0000000e+00] +planet_radii: [695700.] +a: 2.3249498e+08 +e: 9.084384e-08 +w: 170.61444 +theta: 179.25969 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt b/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt new file mode 100644 index 0000000..e010724 --- /dev/null +++ b/data/test_data/test_env_step_with_action/output_test_env_step_with_action_log.txt @@ -0,0 +1,76 @@ +Test Environment Step with Thrust Action +Environment has been reset +Seed: 42 +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 1.0 +alpha_x: 1.0 +alpha_y: 1.0 + + +Observation: +X: 3.4947384e+07 +Y: -2.2985344e+08 +VX: 23.378437 +VY: 3.555108 +m: 3365.874 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.866e+00 2.053e+01 2.071e+02 2.073e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.551e+00 3.555e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4164000e+04 1.2784000e+04 -5.8937073e-04 9.2711449e-03 + -1.2597656e-01] +planet_radii: [695700.] +a: 2.3251091e+08 +e: 7.2937924e-05 +w: 28.867548 +theta: 339.77762 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt b/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt new file mode 100644 index 0000000..e010724 --- /dev/null +++ b/data/test_data/test_env_step_with_action/truth_test_env_step_with_action_log.txt @@ -0,0 +1,76 @@ +Test Environment Step with Thrust Action +Environment has been reset +Seed: 42 +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 1.0 +alpha_x: 1.0 +alpha_y: 1.0 + + +Observation: +X: 3.4947384e+07 +Y: -2.2985344e+08 +VX: 23.378437 +VY: 3.555108 +m: 3365.874 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.866e+00 2.053e+01 2.071e+02 2.073e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.551e+00 3.555e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4164000e+04 1.2784000e+04 -5.8937073e-04 9.2711449e-03 + -1.2597656e-01] +planet_radii: [695700.] +a: 2.3251091e+08 +e: 7.2937924e-05 +w: 28.867548 +theta: 339.77762 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth b/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth new file mode 100644 index 0000000..6017479 Binary files /dev/null and b/data/test_data/test_env_step_with_nn_action/nn_controller_weights_smoothed_full_10e3_epochs.pth differ diff --git a/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt b/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt new file mode 100644 index 0000000..2208fb4 --- /dev/null +++ b/data/test_data/test_env_step_with_nn_action/output_test_env_step_with_nn_action_log.txt @@ -0,0 +1,77 @@ +Test Environment Step with Neural Network Thrust Action +Environment has been reset +Seed: 42 +Neural Net loaded from: C:\Users\micha\MSI_Data\Masters_Thesis\astro_compass\data\test_data\test_env_step_with_nn_action\nn_controller_weights_smoothed_full_10e3_epochs.pth +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 0.28366512 +alpha_x: -0.22682723 +alpha_y: -0.17161115 + + +Observation: +X: 3.494738e+07 +Y: -2.2985344e+08 +VX: 23.376595 +VY: 3.553541 +m: 3365.964 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.900e+00 2.090e+01 2.109e+02 2.111e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.550e+00 3.554e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4160000e+04 1.2784000e+04 -2.4318695e-03 7.7040195e-03 + -3.5888672e-02] +planet_radii: [695700.] +a: 2.3247046e+08 +e: 0.00010934778 +w: 156.20451 +theta: 164.84969 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt b/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt new file mode 100644 index 0000000..2208fb4 --- /dev/null +++ b/data/test_data/test_env_step_with_nn_action/truth_test_env_step_with_nn_action_log.txt @@ -0,0 +1,77 @@ +Test Environment Step with Neural Network Thrust Action +Environment has been reset +Seed: 42 +Neural Net loaded from: C:\Users\micha\MSI_Data\Masters_Thesis\astro_compass\data\test_data\test_env_step_with_nn_action\nn_controller_weights_smoothed_full_10e3_epochs.pth +Observation: +X: 3.486322e+07 +Y: -2.2986622e+08 +VX: 23.379026 +VY: 3.545837 +m: 3366.0 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +Info: +Elapsed time: 0.0 +ODE Solution: None +delta_state: None +planet_radii: [695700.] +a: 2.32495e+08 +e: 1.2412671e-16 +w: 26.56505 +theta: 342.0591 +max_thrust: 0.00133 +ISP: 3872.0 + + +Action +u: 0.28366512 +alpha_x: -0.22682723 +alpha_y: -0.17161115 + + +Observation: +X: 3.494738e+07 +Y: -2.2985344e+08 +VX: 23.376595 +VY: 3.553541 +m: 3365.964 +mu: 1.3e+11 +sma_target: 1.4959802e+08 + + +reward: 0.0 +terminated: False +truncated: False + + +Info post-step: +Elapsed time: 3600.0 +ODE Solution: message: The solver successfully reached the end of the integration interval. + success: True + status: 0 + t: [ 0.000e+00 1.900e+00 2.090e+01 2.109e+02 2.111e+03 + 3.600e+03] + y: [[ 3.486e+07 3.486e+07 ... 3.491e+07 3.495e+07] + [-2.299e+08 -2.299e+08 ... -2.299e+08 -2.299e+08] + ... + [ 3.546e+00 3.546e+00 ... 3.550e+00 3.554e+00] + [ 3.366e+03 3.366e+03 ... 3.366e+03 3.366e+03]] + sol: None + t_events: None + y_events: None + nfev: 32 + njev: 0 + nlu: 0 +delta_state: [ 8.4160000e+04 1.2784000e+04 -2.4318695e-03 7.7040195e-03 + -3.5888672e-02] +planet_radii: [695700.] +a: 2.3247046e+08 +e: 0.00010934778 +w: 156.20451 +theta: 164.84969 +max_thrust: 0.00133 +ISP: 3872.0 + + diff --git a/src/python/Constants.py b/src/python/Constants.py index 8498b7c..675abbb 100644 --- a/src/python/Constants.py +++ b/src/python/Constants.py @@ -1,5 +1,7 @@ class Constants: - RADIUS_SUN_M = 6.957e8 - RADIUS_EARTH_M = 6.378e6 + RADIUS_SUN_M = 6.957e8 # m + RADIUS_EARTH_M = 6.378e6 # m MU_SUN = 1.3e11 + MU_SUN_M = 1.3e20 # m^3/s^2 G0 = 9.80665 # m/s^2 + G0_KM = G0 / 1000 diff --git a/src/python/Ephemeris.py b/src/python/Ephemeris.py index 988df60..0be8921 100644 --- a/src/python/Ephemeris.py +++ b/src/python/Ephemeris.py @@ -23,7 +23,7 @@ def __init__(self): # initialize an empty ephemeris object self.reset() - def add_data(self, et, x, y, vx, vy, m, alpha_x, alpha_y, u): + def add_data(self, et, x, y, vx, vy, m, alpha_x=0.0, alpha_y=0.0, u=0.0): self.arr_et = np.append(self.arr_et, et) self.arr_x = np.append(self.arr_x, x) self.arr_y = np.append(self.arr_y, y) @@ -35,7 +35,9 @@ def add_data(self, et, x, y, vx, vy, m, alpha_x, alpha_y, u): self.arr_u = np.append(self.arr_u, u) self.num_vectors = self.num_vectors + 1 - def add_polar_data(self, et, r, theta, r_dot, v_theta, m, alpha_x, alpha_y, u): + def add_polar_data( + self, et, r, theta, r_dot, v_theta, m, alpha_x=0.0, alpha_y=0.0, u=0.0 + ): # convert polar coordinates to cartesian x = r * np.cos(theta) y = r * np.sin(theta) @@ -150,7 +152,7 @@ def plot_xy_ref_orbit(self, orbit_sma, label): def plot_all_ephemeris_data(self): figs = [] - + fig, ax = plot.subplots(figsize=(6, 6)) ax.plot(self.arr_et, self.arr_m, label="Spacecraft Mass") ax.set_title("Spacecraft Mass over Time") @@ -181,8 +183,8 @@ def plot_all_ephemeris_data(self): ax.legend(loc="lower right") plot.show() figs.append(fig) - - return figs; + + return figs def write_to_file(self, file_path, mod_vector_write_frequency=1): file_name_base = os.path.basename(file_path) diff --git a/src/python/NN_Utils.py b/src/python/NN_Utils.py index e63f11c..9951ec9 100644 --- a/src/python/NN_Utils.py +++ b/src/python/NN_Utils.py @@ -1,6 +1,7 @@ import torch import matplotlib.pyplot as plt import numpy as np +import os from StateVectorUtilities import non_dimensionalize from torch.utils.data import TensorDataset, random_split @@ -40,7 +41,7 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas u") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_u.jpg") # Vector format + fig.savefig(os.path.join(dir_plots, "nn_val_compare_u.jpg")) fig, ax = plt.subplots(figsize=(6, 6)) ax.scatter( @@ -51,7 +52,7 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas $\alpha_y$") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_alpha_x.jpg") # Vector format + fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_x.jpg")) fig, ax = plt.subplots(figsize=(6, 6)) ax.scatter( @@ -62,7 +63,7 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas $\alpha_y$") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_alpha_y.jpg") # Vector format + fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_y.jpg")) elif params["control_data_set"] == "u": @@ -79,7 +80,7 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas u") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_u.jpg") # Vector format + fig.savefig(os.path.join(dir_plots, "nn_val_compare_u.jpg")) elif params["control_data_set"] == "alpha": @@ -92,7 +93,9 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas $\alpha_x$") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_alpha_x.jpg") # Vector format + fig.savefig( + os.path.join(dir_plots, "nn_val_compare_alpha_x.jpg") + ) # Vector format fig, ax = plt.subplots(figsize=(6, 6)) ax.scatter( @@ -103,7 +106,7 @@ def evaluate_neural_network( ax.set_ylabel(r"Control deltas $\alpha_y$") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_val_compare_alpha_y.jpg") # Vector format + fig.savefig(os.path.join(dir_plots, "nn_val_compare_alpha_y.jpg")) else: raise Exception( @@ -127,7 +130,7 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params): vector = sample_ephem_compare.get_vector_at_index(i) - arr_control = query_NN_at_ephem_state(NN_TBT, vector, params) + arr_control = query_NN_at_state(NN_TBT, vector, params) if params["control_data_set"] == "all": arr_u_nn.append(arr_control[0]) @@ -158,7 +161,7 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params): ax.set_ylabel("Ephemeris Throttle u") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_ephem_compare_u.jpg") + fig.savefig(os.path.join(dir_plots, "nn_ephem_compare_u.jpg")) fig, ax = plt.subplots(figsize=(6, 6)) ax.plot( @@ -191,19 +194,19 @@ def compare_NN_with_ephem(NN_TBT, sample_ephem_compare, dir_plots, params): ax.set_ylabel("Ephemeris Thrust Direction") ax.legend() fig.tight_layout() - fig.savefig(dir_plots + "nn_ephem_compare_alpha.jpg") + fig.savefig(os.path.join(dir_plots, "nn_ephem_compare_alpha.jpg")) -def query_NN_at_ephem_state(NN_TBT, vector, params): +def query_NN_at_state(NN_TBT, vector, params): # unpack components of interest - x = vector[1] - y = vector[2] - vx = vector[3] - vy = vector[4] - m = vector[5] + x = vector[0] + y = vector[1] + vx = vector[2] + vy = vector[3] + m = vector[4] - # pack into an array + # pack into a new array state = [x, y, vx, vy, m] # non-dimensionalize the state vector @@ -228,7 +231,7 @@ def query_NN_at_ephem_state(NN_TBT, vector, params): # if we are using BCE with logits, convert to a thrust action # otherwise the NN directly outputs the control action - if params["loss"] == "BCEWithLogitsLoss": + if "loss" in params and (params["loss"] == "BCEWithLogitsLoss"): probs = torch.sigmoid(nn_output) nn_control = (probs > 0.5).float() else: diff --git a/src/python/Propagation.py b/src/python/Propagation.py index fc706e0..7d2c33b 100644 --- a/src/python/Propagation.py +++ b/src/python/Propagation.py @@ -489,3 +489,71 @@ def smoothing_function_homotopic(rho, eps, flag_constrain_u): u_smooth = 1 / 2 + (rho / 2 * eps) return u_smooth + + +def env_EOM_TBT_v2(t, state, params): + """ + Two-Body Orbit Transfer Gym Environment ode propagation function + ----------------------------------------------------------------------------------- + This is a set of equations of motion that govern a spacecraft in the + 2-dimensional two-body problem, formatted specifically to the Two-Body + Transfer gym environment + + Inputs + ----------------------------------------------------------------------------------- + t: Elapsed time + state: Input state vector (x,y,vx,vy,m) + params: The list of parameters () + + Outputs + ----------------------------------------------------------------------------------- + Derivatives for state vector dy + """ + + # get the number of parameters + num_params = len(params) + + # check parameter length + if num_params != 7: + raise Exception("Invalid number of parameters") + + # unpack the state vector + x, y, vx, vy, m = state[:5] + + # unpack the parameters + mu = params[0] # gravitational parameter of the central body + T_max = params[1] # max thrust of the spacecraft + ISP = params[2] # spacecraft specific impulse + g0 = params[3] # acceleration at Earth surface + u = params[4] # spacecraft throttle control + alpha_x = params[5] # thrust x direction + alpha_y = params[6] # thrust y direction + + # create vectors + r_vec = np.array([x, y]) + v_vec = np.array([vx, vy]) + alpha_vec = np.array([alpha_x, alpha_y]) + + # enfore unit vector + if (alpha_x**2 + alpha_y**2) != 0.0: + alpha_vec = alpha_vec / (alpha_x**2 + alpha_y**2) + + # Derivative calculation preliminaries + r = np.linalg.norm(r_vec) + + # state vector derivative calculations + dr_vec = v_vec + dv_vec = -mu / r**3 * r_vec + u * T_max / m * alpha_vec + dm = -T_max * u / ISP / g0 + + derivs = np.array( + [ + dr_vec[0], + dr_vec[1], + dv_vec[0], + dv_vec[1], + dm, + ] + ) + + return derivs diff --git a/src/python/Spacecraft.py b/src/python/Spacecraft.py index 71be4ea..c37b0bc 100644 --- a/src/python/Spacecraft.py +++ b/src/python/Spacecraft.py @@ -1,16 +1,17 @@ import numpy as np +from StateVectorUtilities import polar_to_cartesian, cartesian_to_polar class Spacecraft: def __init__(self, r, theta, r_dot, v_theta, mass, C1, C2): # Initialize the state of the spacecraft - self.update_state(r, theta, r_dot, v_theta, mass) + self.update_state_polar(r, theta, r_dot, v_theta, mass) # Set propulsion parameters self.max_thrust = C1 self.specific_impulse = C2 - def update_state(self, r, theta, r_dot, v_theta, mass): + def update_state_polar(self, r, theta, r_dot, v_theta, mass): # Set state vector polar coordinates coordinates self.r = r self.theta = theta @@ -21,10 +22,7 @@ def update_state(self, r, theta, r_dot, v_theta, mass): self.mass = mass # convert polar coordinates to cartesian - x = r * np.cos(theta) - y = r * np.sin(theta) - vx = r_dot * np.cos(theta) - v_theta * r * np.sin(theta) - vy = r_dot * np.sin(theta) + v_theta * r * np.cos(theta) + x, y, vx, vy = polar_to_cartesian(r, theta, r_dot, v_theta) # Set state vector cartesian coordinates self.x = x @@ -32,6 +30,22 @@ def update_state(self, r, theta, r_dot, v_theta, mass): self.vx = vx self.vy = vy + def update_state_cartesian(self, x, y, vx, vy, m): + + self.x = x + self.y = y + self.vx = vx + self.vy = vy + self.mass = m + + r, theta, rdot, v_theta = cartesian_to_polar(x, y, vx, vy) + + # update polar coordinates + self.r = r + self.theta = theta + self.r_dot = rdot + self.v_theta = v_theta + def calc_Planar_OE(self, x_cb, y_cb, vx_cb, vy_cb, mu_cb): # determine coordinates relative to central body x_rel = self.x - x_cb @@ -101,3 +115,7 @@ def calc_Planar_OE(self, x_cb, y_cb, vx_cb, vy_cb, mu_cb): theta = 2 * np.pi - np.acos(dotp) return a, e, w, theta + + def get_cartesian_state(self): + + return self.x, self.y, self.vx, self.vy diff --git a/src/python/TwoBody_Orb2Orb_Transfer_Env.py b/src/python/TwoBody_Orb2Orb_Transfer_Env.py index 4f1260e..03242b3 100644 --- a/src/python/TwoBody_Orb2Orb_Transfer_Env.py +++ b/src/python/TwoBody_Orb2Orb_Transfer_Env.py @@ -4,7 +4,8 @@ from scipy.integrate import solve_ivp from Constants import Constants from Spacecraft import Spacecraft -from Propagation import spacecraft_EOM_radial_2D_EB +from Propagation import env_EOM_TBT_v2 +from StateVectorUtilities import polar_to_cartesian class TwoBody_Orb2Orb_Transfer_Env(gym.Env): @@ -26,16 +27,19 @@ def __init__(self): # list of environment parameters (Sun is the central body) self.arr_mu = np.array([Constants.MU_SUN]) # solar mu [km^3/s^2] - self.planet_radii = np.array([Constants.RADIUS_SUN_M/1000]) # solar radius [km] + self.planet_radii = np.array( + [Constants.RADIUS_SUN_M / 1000] + ) # solar radius [km] self.elapsed_t = 0.0 self.step_size = 3600.0 # define the action space - # The action space consists of two variables: + # The action space consists of three variables: # 1) a control throlle input (scaled from 0 to 1) - # 2) a thrust vector (0 to 2pi) - low_array_action = np.array([0.0, 0.0], dtype=np.float32) - high_array_action = np.array([1.0, 2 * np.pi], dtype=np.float32) + # 2) a thrust direction x vector component (ranges -1 to 1) + # 3) a thrust direction y vector component (ranges -1 to 1) + low_array_action = np.array([0.0, -1.0, -1.0], dtype=np.float32) + high_array_action = np.array([1.0, 1.0, 1.0], dtype=np.float32) self.action_space = gym.spaces.Box(low=low_array_action, high=high_array_action) def _get_info(self, ode_solution, delta_r): @@ -80,10 +84,11 @@ def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): vx_cb = 0.0 vy_cb = 0.0 + # convert to cartesian coordinates with random theta as input + x, y, vx, vy = polar_to_cartesian(r, theta, r_dot, v_theta) + # set the initial state of the environment - self._state = np.array( - [r, theta, r_dot, v_theta, mass, mu, sma_target], dtype=np.float32 - ) + self._state = np.array([x, y, vx, vy, mass, mu, sma_target], dtype=np.float32) # set the location of the central body self._arr_cb = np.array([x_cb, y_cb, vx_cb, vy_cb], dtype=np.float32) @@ -155,10 +160,10 @@ def _apply_dV_in_VNB_frame(self, dV, X_i, Y_i, VX_i, VY_i): def step(self, action): # unpack the state vector - r = self._state[0] - theta = self._state[1] - r_dot = self._state[2] - v_theta = self._state[3] + x = self._state[0] + y = self._state[1] + vx = self._state[2] + vy = self._state[3] mass = self._state[4] mu = self._state[5] sma_target = self._state[6] @@ -174,27 +179,27 @@ def step(self, action): # unpack the action vector u = action[0] # throttle control - beta = action[1] # spacecraft attitude + alpha_x = action[1] # spacecraft thrust x-direction + alpha_y = action[2] # spacecraft thrust y-direction # step the spacecraft forward t_span = (0.0, self.step_size) - y0 = np.array([r, theta, r_dot, v_theta, mass]) + y0 = np.array([x, y, vx, vy, mass]) params = np.array( [ self.arr_mu[0], - self.planet_radii[0], sc.max_thrust, sc.specific_impulse, + Constants.G0_KM, u, - beta, + alpha_x, + alpha_y, ], dtype=np.float32, ) # solve ODE - solution = solve_ivp( - spacecraft_EOM_radial_2D_EB, t_span, y0, method="RK45", args=(params,) - ) + solution = solve_ivp(env_EOM_TBT_v2, t_span, y0, method="RK45", args=(params,)) # extract the final state vector from ODE solution (last column in y) y_final = (solution.y[:, -1]).astype(np.float32) @@ -205,10 +210,9 @@ def step(self, action): # update the state and elapsed time self.elapsed_t = self.elapsed_t + self.step_size self._state = np.append(y_final, [mu, sma_target]) - # self._state[4] and self._state[5] are constant # update the spacecraft object - sc.update_state(*y_final) + sc.update_state_cartesian(*y_final) # update the environment spacecraft object self._spacecraft = sc diff --git a/src/scripts/train_alpha_network.py b/src/scripts/train_alpha_network.py index dd78452..2e33312 100644 --- a/src/scripts/train_alpha_network.py +++ b/src/scripts/train_alpha_network.py @@ -40,11 +40,11 @@ def train_u_network(): # parameters params = { "training_data_pts": 1000, # training data batch size - "training_epochs": 1000, # number of training epochs to run + "training_epochs": 100, # number of training epochs to run "patience": 200000, # Epochs without training loss improvement to stop training "learning_rate_i": 0.1, # Initial Parameter learning rate "learning_rate_f": 0.1, # Final Parameter learning rate - "plot_update": 1000, # Number of epochs before plot is updated + "plot_update": 100, # Number of epochs before plot is updated "report_update": 1, # Number of epochs between reporting training status "train_fraction": 0.8, # Fraction of data to use for training "eval_fraction": 0.2, # Fraction of data to use for eval @@ -65,14 +65,15 @@ def train_u_network(): # paths print("Current wd: " + os.getcwd()) dir_training_dir = ( - "..\\data\\training_ephems\\test_set_bang_bang\\" # path to training data + "..\\data\\training_ephems\\test_set_bang_bang_subset\\" # path to training data ) dir_plots = "..\\data\\plots\\" # path for storing plot data dir_nn = "..\\data\\neural_networks\\" # path for saving trained nn path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir)) path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots)) path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn)) - path_plot_nn_training = path_plots + "nn_training.jpg" + path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth")) + path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg") # plotting structure init arr_epochs = [] @@ -186,7 +187,7 @@ def train_u_network(): ) # save NN to file - torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth") + torch.save(NN_TBT.state_dict(), path_nn ) train_u_network() diff --git a/src/scripts/train_neural_network.py b/src/scripts/train_neural_network.py index 7e0d5a9..7a01cad 100644 --- a/src/scripts/train_neural_network.py +++ b/src/scripts/train_neural_network.py @@ -14,7 +14,7 @@ from gymnasium.envs.registration import register from torch.utils.data import DataLoader from torch.optim.lr_scheduler import CosineAnnealingLR -from Neural_Net_Controller import NN_TBT_Controller +from Neural_Net_Controllers import NN_TBT_Controller from Training_Data_Generation import read_ephems_from_dir from Constants import Constants from Plotting_Utils import format_plots, plot_training_loss @@ -38,19 +38,19 @@ def train_neural_network(): # parameters - training_data_pts = 1000 # training data batch size - training_epochs = 100000 # number of training epochs to run - min_mse = 999999 # min mse init value - patience = 200000 # If the number of iterations since min is greater than this number - training ends - learning_rate_i = 0.1 # Initial Parameter learning rate - learning_rate_f = 0.1 # Final Parameter learning rate - plot_update = training_epochs # Number of epochs before plot is updated - report_update = 1000 # Number of epochs between reporting training status - train_fraction = 0.8 # Fraction of data to use for training - eval_fraction = 0.2 # Fraction of data to use for eval - annealing_tmax = 1000 - params = { + "training_data_pts": 1000, # training data batch size + "training_epochs": 1000, # number of training epochs to run + "patience": 200000, # Epochs without training loss improvement to stop training + "learning_rate_i": 0.4, # Initial Parameter learning rate + "learning_rate_f": 0.4, # Final Parameter learning rate + "plot_update": 1000, # Number of epochs before plot is updated + "report_update": 10, # Number of epochs between reporting training status + "train_fraction": 0.8, # Fraction of data to use for training + "eval_fraction": 0.2, # Fraction of data to use for eval + "annealing_tmax": 1000, # Cosine annealing max iters + "loss": "MSE", # MSE, BCEWithLogitsLoss + "control_data_set": "all", # Control data sets to train (all, u, alpha) "mu": Constants.MU_SUN * 10 ** (9), # sun mu [m^3/s^2] "max_T": 1.33, # max spacecraft thrust [N] "ISP": 3872.0, # spacecraft specific impulse [s] @@ -64,14 +64,15 @@ def train_neural_network(): # paths dir_training_dir = ( - "\\data\\training_ephems\\test_set_bang_bang_subset\\" # path to training data + "..\\data\\training_ephems\\test_set_bang_bang_subset\\" # path to training data ) - dir_plots = "\\data\\plots\\" # path for storing plot data - dir_nn = "\\data\\neural_networks\\" # path for saving trained nn - path_training_dir = os.getcwd() + dir_training_dir - path_plots = os.getcwd() + dir_plots - path_nn = os.getcwd() + dir_nn - path_plot_nn_training = path_plots + "nn_training.jpg" + dir_plots = "..\\data\\plots\\" # path for storing plot data + dir_nn = "..\\data\\neural_networks\\" # path for saving trained nn + path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir)) + path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots)) + path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn)) + path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth")) + path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg") # plotting structure init arr_epochs = [] @@ -86,11 +87,11 @@ def train_neural_network(): # establish optimizer # optimizer = torch.optim.Adam(NN_TBT.parameters(), lr=learning_rate_i) - optimizer = torch.optim.SGD(NN_TBT.parameters(), lr=learning_rate_i) + optimizer = torch.optim.SGD(NN_TBT.parameters(), lr=params["learning_rate_i"]) # define a LR scheduler scheduler = CosineAnnealingLR( - optimizer, T_max=annealing_tmax, eta_min=learning_rate_f + optimizer, T_max=params["annealing_tmax"], eta_min=params["learning_rate_f"] ) # read ephemeris files @@ -102,9 +103,7 @@ def train_neural_network(): print(str(num_ephems * set_ephems[0].num_vectors) + " training data points") print("Number of Neural Network Parameters: " + str(num_p)) - train_dataset, val_dataset = pre_process_training_data( - set_ephems, train_fraction, eval_fraction, params - ) + train_dataset, val_dataset = pre_process_training_data( set_ephems, params ) # Training # -------------------------------------------------------------------------------------------------------- @@ -122,10 +121,10 @@ def train_neural_network(): NN_TBT.train() # using torch loader object to load training and eval data - train_loader = DataLoader(train_dataset, batch_size=training_data_pts, shuffle=True) - val_loader = DataLoader(val_dataset, batch_size=training_data_pts, shuffle=False) + train_loader = DataLoader(train_dataset, batch_size=params["training_data_pts"], shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=params["training_data_pts"], shuffle=False) - while epoch <= training_epochs: + while epoch <= params["training_epochs"]: # perform training epoch NN_TBT, avg_train_loss = training_epoch( @@ -138,7 +137,7 @@ def train_neural_network(): i_at_min = epoch # exit condition - if epoch > patience + i_at_min: + if epoch > params["patience"] + i_at_min: print("Patience Criterion reached, exiting training") flag_exit = True break @@ -147,7 +146,7 @@ def train_neural_network(): break # eval NN - if epoch % plot_update == 0: + if epoch % params["plot_update"] == 0: params["flag_plot"] = True else: params["flag_plot"] = False @@ -161,23 +160,23 @@ def train_neural_network(): arr_loss_train.append(avg_train_loss) arr_loss.append(avg_loss_val) - if epoch % report_update == 0: + if epoch % params["report_update"] == 0: print( - f"Epoch [{epoch}/{training_epochs}], Training Loss: {avg_train_loss:.4e}, Eval loss: {avg_loss_val:.4e} Min loss: {min_mse:.4e} last min: {epoch - i_at_min} lr: {scheduler.get_last_lr()[0]:.4e}" + f"Epoch [{epoch}/{params["training_epochs"]}], Training Loss: {avg_train_loss:.4e}, Eval loss: {avg_loss_val:.4e} Min loss: {min_mse:.4e} last min: {epoch - i_at_min} lr: {scheduler.get_last_lr()[0]:.4e}" ) - if epoch % plot_update == 0: + if epoch % params["plot_update"] == 0: plot_training_loss( - arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training + arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training, params ) epoch = epoch + 1 # final training plot update - plot_training_loss(arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training) + plot_training_loss(arr_epochs, arr_loss_train, arr_loss, path_plot_nn_training, params) # save NN to file - torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth") + torch.save(NN_TBT.state_dict(), path_nn ) train_neural_network() diff --git a/src/scripts/train_u_network.py b/src/scripts/train_u_network.py index 3ce14d1..2ecbf6c 100644 --- a/src/scripts/train_u_network.py +++ b/src/scripts/train_u_network.py @@ -63,15 +63,17 @@ def train_u_network(): } # paths + print("Current wd: " + os.getcwd()) dir_training_dir = ( - "\\data\\training_ephems\\test_set_bang_bang\\" # path to training data + "..\\data\\training_ephems\\test_set_bang_bang_subset\\" # path to training data ) - dir_plots = "\\data\\plots\\" # path for storing plot data - dir_nn = "\\data\\neural_networks\\" # path for saving trained nn - path_training_dir = os.getcwd() + dir_training_dir - path_plots = os.getcwd() + dir_plots - path_nn = os.getcwd() + dir_nn - path_plot_nn_training = path_plots + "nn_training.jpg" + dir_plots = "..\\data\\plots\\" # path for storing plot data + dir_nn = "..\\data\\neural_networks\\" # path for saving trained nn + path_training_dir = os.path.normpath(os.path.join(os.getcwd(), dir_training_dir)) + path_plots = os.path.normpath(os.path.join(os.getcwd(), dir_plots)) + path_nn = os.path.normpath(os.path.join(os.getcwd(), dir_nn)) + path_nn = os.path.normpath(os.path.join(path_nn, "nn_controller_weights.pth")) + path_plot_nn_training = os.path.join(path_plots,"nn_training.jpg") # plotting structure init arr_epochs = [] @@ -186,7 +188,7 @@ def train_u_network(): ) # save NN to file - torch.save(NN_TBT.state_dict(), path_nn + "nn_controller_weights.pth") + torch.save(NN_TBT.state_dict(), path_nn ) train_u_network() diff --git a/src/tests/TBO_Transfer_Env_Test.py b/src/tests/TBO_Transfer_Env_Test.py index 243e276..403d049 100644 --- a/src/tests/TBO_Transfer_Env_Test.py +++ b/src/tests/TBO_Transfer_Env_Test.py @@ -1,6 +1,5 @@ import numpy as np import gymnasium as gym -import matplotlib.pyplot as plot import sys import os @@ -8,7 +7,9 @@ from gymnasium.envs.registration import register # Adding python src code directory -sys.path.append(os.path.abspath("../python")) +current_dir = os.path.dirname(__file__) +python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python")) +sys.path.append(python_src_dir) from Ephemeris import Ephemeris @@ -25,7 +26,7 @@ env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0") -steps_per_traj = 365 * 24 * 3 +steps_per_traj = 86400 * 365 * 1.1 / 3600 num_traj = 1 @@ -35,21 +36,20 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj): arr_reward_totals = np.array([]) total_steps_in_env = 0 - for count_traj in range(0, num_traj): + for count_traj in range(0, num_trajectories): # reset the environment steps = 0 r_tot = 0.0 eph = Ephemeris() - observation, info = env.reset() + observation, info = env.reset(seed=42) while steps < steps_per_traj: - # Sample randomly from the action space. Since the action is a delta-V - # magnitude in km/s, and the action space is unbounded (-inf to inf) the - # test maneuver that is returned will be sampled from a Gaussian normal - # distribution with a mean of 0 and a standard deviation of 1. We - # devide by 1000 in this test case to give relatively small maneuvers. + # Arbitrary test action action = env.action_space.sample() + action[0] = 1.0 + action[1] = -1.0 + np.tanh(steps / steps_per_traj) + action[2] = -1.0 + np.tanh(2 * steps / steps_per_traj) observation, reward, terminated, truncated, info = env.step(action) @@ -60,13 +60,19 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj): if terminated: break - eph.add_polar_data( + if truncated: + break + + eph.add_data( elapsed_time, observation[0], observation[1], observation[2], observation[3], observation[4], + action[1], + action[2], + action[0], ) # print( elapsed_time, a, e, reward ) @@ -89,20 +95,9 @@ def test_runnable_env(env, num_trajectories, num_steps_per_traj): if count_traj == num_traj - 1: print("Plotting last trajectory...") - fig = eph.plot_xy(info["planet_radii"]) - plot.show(fig) - - fig_reward, ax = plot.subplots(figsize=(6, 6)) - - ax.plot(arr_episodes, arr_reward_totals, label="Total Reward") - - # Customize the figure - ax.set_title("Total Reward Per Episode") - ax.set_xlabel("Episode Count") - ax.set_ylabel("Total R") - ax.legend() - ax.grid(False) - plot.show(fig_reward) + eph.plot_xy(info["planet_radii"]) + eph.plot_xy_ref_orbit(observation[6], "Earth Orbit") + eph.plot_all_ephemeris_data() print("Test successful") diff --git a/src/tests/test_env_step_no_action.py b/src/tests/test_env_step_no_action.py new file mode 100644 index 0000000..9d6a302 --- /dev/null +++ b/src/tests/test_env_step_no_action.py @@ -0,0 +1,120 @@ +import numpy as np +import gymnasium as gym +import sys +import os +import filecmp + +from gymnasium import envs +from gymnasium.envs.registration import register + +# Adding python src code directory +current_dir = os.path.dirname(__file__) +python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python")) +sys.path.append(python_src_dir) + + +# register the environment if it isn't registered +if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys(): + register( + id="TwoBody_Orb2Orb_Transfer_Env-v0", + entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env", + ) + + +# initialize the environment +env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0") +seed_in = 42 + + +def log(info, log, flag_report_to_console=False): + + log.append(info) + + if flag_report_to_console: + print(info) + + return log + + +def test_env_step_no_action(env, seed_in): + + test_log = [] + test_log = log("Test Environment Step with No Action", test_log, True) + + observation_2, info = env.reset(seed=seed_in) + test_log = log("Environment has been reset", test_log, True) + test_log = log("Seed: " + str(seed_in), test_log, True) + + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation_2[0]), test_log, True) + test_log = log("Y: " + str(observation_2[1]), test_log, True) + test_log = log("VX: " + str(observation_2[2]), test_log, True) + test_log = log("VY: " + str(observation_2[3]), test_log, True) + test_log = log("m: " + str(observation_2[4]), test_log, True) + test_log = log("mu: " + str(observation_2[5]), test_log, True) + test_log = log("sma_target: " + str(observation_2[6]), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info: ", test_log, True) + for key in info.keys(): + test_log = log(str(key) + ": " + str(info[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # zero action + action = np.array([0.0, 0.0, 0.0]) + + test_log = log("Action", test_log, True) + test_log = log("u: " + str(action[0]), test_log, True) + test_log = log("alpha_x: " + str(action[1]), test_log, True) + test_log = log("alpha_y: " + str(action[2]), test_log, True) + test_log = log("\n", test_log, True) + + # step the environment + observation_2, reward, terminated, truncated, info_2 = env.step(action) + + # update observation_2 + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation_2[0]), test_log, True) + test_log = log("Y: " + str(observation_2[1]), test_log, True) + test_log = log("VX: " + str(observation_2[2]), test_log, True) + test_log = log("VY: " + str(observation_2[3]), test_log, True) + test_log = log("m: " + str(observation_2[4]), test_log, True) + test_log = log("mu: " + str(observation_2[5]), test_log, True) + test_log = log("sma_target: " + str(observation_2[6]), test_log, True) + test_log = log("\n", test_log, True) + + # Report the reward + test_log = log("reward: " + str(reward), test_log, True) + test_log = log("terminated: " + str(terminated), test_log, True) + test_log = log("truncated: " + str(truncated), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info post-step: ", test_log, True) + for key in info_2.keys(): + test_log = log(str(key) + ": " + str(info_2[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # write the log to a text file + dir_test = os.path.normpath( + os.path.join(os.getcwd(), "data\\test_data\\test_env_step_no_action\\") + ) + path_test_report = os.path.normpath( + os.path.join(dir_test, "output_test_env_step_no_action_log.txt") + ) + path_test_truth = os.path.normpath( + os.path.join(dir_test, "truth_test_env_step_no_action_log.txt") + ) + with open(path_test_report, "w", encoding="utf-8") as f: + for line in test_log: + f.write(line + "\n") + + # compare the two files + print("output log: ", path_test_report) + print("truth log: ", path_test_truth) + are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False) + print("Test passed? ", are_same) + + +test_env_step_no_action(env, seed_in) diff --git a/src/tests/test_env_step_with_action.py b/src/tests/test_env_step_with_action.py new file mode 100644 index 0000000..d6a55c5 --- /dev/null +++ b/src/tests/test_env_step_with_action.py @@ -0,0 +1,120 @@ +import numpy as np +import gymnasium as gym +import sys +import os +import filecmp + +from gymnasium import envs +from gymnasium.envs.registration import register + +# Adding python src code directory +current_dir = os.path.dirname(__file__) +python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python")) +sys.path.append(python_src_dir) + + +# register the environment if it isn't registered +if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys(): + register( + id="TwoBody_Orb2Orb_Transfer_Env-v0", + entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env", + ) + + +# initialize the environment +env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0") +seed_in = 42 + + +def log(info, log, flag_report_to_console=False): + + log.append(info) + + if flag_report_to_console: + print(info) + + return log + + +def test_env_step_no_action(env, seed_in): + + test_log = [] + test_log = log("Test Environment Step with Thrust Action", test_log, True) + + observation_2, info = env.reset(seed=seed_in) + test_log = log("Environment has been reset", test_log, True) + test_log = log("Seed: " + str(seed_in), test_log, True) + + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation_2[0]), test_log, True) + test_log = log("Y: " + str(observation_2[1]), test_log, True) + test_log = log("VX: " + str(observation_2[2]), test_log, True) + test_log = log("VY: " + str(observation_2[3]), test_log, True) + test_log = log("m: " + str(observation_2[4]), test_log, True) + test_log = log("mu: " + str(observation_2[5]), test_log, True) + test_log = log("sma_target: " + str(observation_2[6]), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info: ", test_log, True) + for key in info.keys(): + test_log = log(str(key) + ": " + str(info[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # throttle is 1 and unit vector components are 1 and 1 (vector is normalized in "step") + action = np.array([1.0, 1.0, 1.0]) + + test_log = log("Action", test_log, True) + test_log = log("u: " + str(action[0]), test_log, True) + test_log = log("alpha_x: " + str(action[1]), test_log, True) + test_log = log("alpha_y: " + str(action[2]), test_log, True) + test_log = log("\n", test_log, True) + + # step the environment + observation_2, reward, terminated, truncated, info_2 = env.step(action) + + # update observation_2 + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation_2[0]), test_log, True) + test_log = log("Y: " + str(observation_2[1]), test_log, True) + test_log = log("VX: " + str(observation_2[2]), test_log, True) + test_log = log("VY: " + str(observation_2[3]), test_log, True) + test_log = log("m: " + str(observation_2[4]), test_log, True) + test_log = log("mu: " + str(observation_2[5]), test_log, True) + test_log = log("sma_target: " + str(observation_2[6]), test_log, True) + test_log = log("\n", test_log, True) + + # Report the reward + test_log = log("reward: " + str(reward), test_log, True) + test_log = log("terminated: " + str(terminated), test_log, True) + test_log = log("truncated: " + str(truncated), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info post-step: ", test_log, True) + for key in info_2.keys(): + test_log = log(str(key) + ": " + str(info_2[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # write the log to a text file + dir_test = os.path.normpath( + os.path.join(os.getcwd(), "data\\test_data\\test_env_step_with_action\\") + ) + path_test_report = os.path.normpath( + os.path.join(dir_test, "output_test_env_step_with_action_log.txt") + ) + path_test_truth = os.path.normpath( + os.path.join(dir_test, "truth_test_env_step_with_action_log.txt") + ) + with open(path_test_report, "w", encoding="utf-8") as f: + for line in test_log: + f.write(line + "\n") + + # compare the two files + print("output log: ", path_test_report) + print("truth log: ", path_test_truth) + are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False) + print("Test passed? ", are_same) + + +test_env_step_no_action(env, seed_in) diff --git a/src/tests/test_env_step_with_nn_action.py b/src/tests/test_env_step_with_nn_action.py new file mode 100644 index 0000000..6f7550b --- /dev/null +++ b/src/tests/test_env_step_with_nn_action.py @@ -0,0 +1,165 @@ +import gymnasium as gym +import sys +import os +import torch +import filecmp + +from gymnasium import envs +from gymnasium.envs.registration import register + +# Adding python src code directory +current_dir = os.path.dirname(__file__) +python_src_dir = os.path.abspath(os.path.join(current_dir, "..", "python")) +sys.path.append(python_src_dir) + +from NN_Utils import query_NN_at_state +from Constants import Constants +from Neural_Net_Controllers import NN_TBT_Controller + + +# register the environment if it isn't registered +if "TwoBody_Orb2Orb_Transfer_Env-v0" not in envs.registry.keys(): + register( + id="TwoBody_Orb2Orb_Transfer_Env-v0", + entry_point="TwoBody_Orb2Orb_Transfer_Env:TwoBody_Orb2Orb_Transfer_Env", + ) + + +# initialize the environment +env = gym.make("TwoBody_Orb2Orb_Transfer_Env-v0") +seed_in = 42 + + +def log(info, log, flag_report_to_console=False): + + log.append(info) + + if flag_report_to_console: + print(info) + + return log + + +def test_env_step_no_action(env, seed_in): + + test_log = [] + test_log = log( + "Test Environment Step with Neural Network Thrust Action", test_log, True + ) + + # paths + path_test_dir = os.path.normpath( + os.path.join(os.getcwd(), "data\\test_data\\test_env_step_with_nn_action\\") + ) + path_test_report = os.path.normpath( + os.path.join(path_test_dir, "output_test_env_step_with_nn_action_log.txt") + ) + path_test_truth = os.path.normpath( + os.path.join(path_test_dir, "truth_test_env_step_with_nn_action_log.txt") + ) + path_input_nn = os.path.normpath( + os.path.join( + path_test_dir, "nn_controller_weights_smoothed_full_10e3_epochs.pth" + ) + ) + + observation, info = env.reset(seed=seed_in) + test_log = log("Environment has been reset", test_log, True) + test_log = log("Seed: " + str(seed_in), test_log, True) + + # load neural network from file + nn_controller = NN_TBT_Controller() # instantiate NN object + nn_control_param_dict = torch.load( + path_input_nn + ) # load parameter dictionary from file + nn_controller.load_state_dict( + nn_control_param_dict + ) # load the state parameter dictionary + + test_log = log("Neural Net loaded from: " + str(path_input_nn), test_log, True) + + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation[0]), test_log, True) + test_log = log("Y: " + str(observation[1]), test_log, True) + test_log = log("VX: " + str(observation[2]), test_log, True) + test_log = log("VY: " + str(observation[3]), test_log, True) + test_log = log("m: " + str(observation[4]), test_log, True) + test_log = log("mu: " + str(observation[5]), test_log, True) + test_log = log("sma_target: " + str(observation[6]), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info: ", test_log, True) + for key in info.keys(): + test_log = log(str(key) + ": " + str(info[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # pack NN state + x = observation[0] + y = observation[1] + vx = observation[2] + vy = observation[3] + m = observation[4] + state = [x, y, vx, vy, m] + + # define normalization parameters + params = { + "mu": Constants.MU_SUN * 10 ** (9), # sun mu [m^3/s^2] + "max_T": 1.33, # max spacecraft thrust [N] + "ISP": 3872.0, # spacecraft specific impulse [s] + "TOF": 1.1 * 365.25 * 24 * 60 * 60, # assumed time of flight [s] + "l_star": 149598023000, # characteristic length = Earth SMA [m] + "m_star": 3366.0, # characteristic mass = SC initial mass [kg] + "t_star": (149598023000**3 / (Constants.MU_SUN * 10 ** (9))) + ** 0.5, # characteristic time - derived + "g0": Constants.G0, # gravtational acceleration at Earth surface [m/s^2] + } + + # get action from NN + action = query_NN_at_state(nn_controller, state, params) + + test_log = log("Action", test_log, True) + test_log = log("u: " + str(action[0]), test_log, True) + test_log = log("alpha_x: " + str(action[1]), test_log, True) + test_log = log("alpha_y: " + str(action[2]), test_log, True) + test_log = log("\n", test_log, True) + + # step the environment + observation, reward, terminated, truncated, info_2 = env.step(action) + + # update observation + test_log = log("Observation: ", test_log, True) + test_log = log("X: " + str(observation[0]), test_log, True) + test_log = log("Y: " + str(observation[1]), test_log, True) + test_log = log("VX: " + str(observation[2]), test_log, True) + test_log = log("VY: " + str(observation[3]), test_log, True) + test_log = log("m: " + str(observation[4]), test_log, True) + test_log = log("mu: " + str(observation[5]), test_log, True) + test_log = log("sma_target: " + str(observation[6]), test_log, True) + test_log = log("\n", test_log, True) + + # Report the reward + test_log = log("reward: " + str(reward), test_log, True) + test_log = log("terminated: " + str(terminated), test_log, True) + test_log = log("truncated: " + str(truncated), test_log, True) + test_log = log("\n", test_log, True) + + test_log = log("Info post-step: ", test_log, True) + for key in info_2.keys(): + test_log = log(str(key) + ": " + str(info_2[key]), test_log, True) + + test_log = log("\n", test_log, True) + + # write the log to a text file + with open(path_test_report, "w", encoding="utf-8") as f: + for line in test_log: + f.write(line + "\n") + + # compare the two files + print("output log: ", path_test_report) + print("truth log: ", path_test_truth) + are_same = filecmp.cmp(path_test_report, path_test_truth, shallow=False) + print("Test passed? ", are_same) + + +test_env_step_no_action(env, seed_in)