sheim · nirmiger · Apr 17, 2024 · Apr 19, 2024 · Apr 26, 2024 · Apr 26, 2024
diff --git a/gym/envs/cartpole/cartpole_config.py b/gym/envs/cartpole/cartpole_config.py
@@ -74,6 +74,12 @@ class policy(FixedRobotCfgPPO.policy):
         hidden_dims = [num_units] * num_layers
         activation = "elu"
 
+        class exploration:
+            # Type of actor, can be "smooth", "colored" or "white"
+            type = "colored"
+            sample_freq = 8
+            beta = 1.0
+
         obs = [
             "cart_obs",
             "pole_trig_obs",

diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -73,8 +73,14 @@ class actor:
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
-        smooth_exploration = True
-        exploration_sample_freq = 8
+
+        class exploration:
+            # Type of actor, can be "smooth", "colored" or "white"
+            type = "colored"
+            sample_freq = 16
+            beta = 1
+            # For sweep
+            log_std_init = 0.0
 
         normalize_obs = True
         obs = [

diff --git a/gym/exploration_analysis/calculate_smoothness.py b/gym/exploration_analysis/calculate_smoothness.py
@@ -0,0 +1,97 @@
+import numpy as np
+
+# Change signal to 500 steps
+smooth_name = "mini_cheetah_ref_smooth_16"
+baseline_name = "mini_cheetah_ref"
+colored_name = "mini_cheetah_ref_colored_1"
+
+colored_data_dir = "./data_train/" + colored_name
+smooth_data_dir = "./data_train/" + smooth_name
+baseline_data_dir = "./data_train/" + baseline_name
+
+# load data
+smooth_pos_target = np.load(smooth_data_dir + "/dof_pos_target.npy")[0]
+baseline_pos_target = np.load(baseline_data_dir + "/dof_pos_target.npy")[0]
+smooth_terminated = np.load(smooth_data_dir + "/terminated.npy")[0]
+baseline_terminated = np.load(baseline_data_dir + "/terminated.npy")[0]
+colored_pos_target = np.load(colored_data_dir + "/dof_pos_target.npy")[0]
+colored_terminated = np.load(colored_data_dir + "/terminated.npy")[0]
+
+# compute FFT averages
+smooth_squared_deltas = [[], [], []]
+colored_squared_deltas = [[], [], []]
+baseline_squared_deltas = [[], [], []]
+for it in range(0, baseline_pos_target.shape[0], 50):
+    # only use data that didn't terminate
+    if not np.any(smooth_terminated[it, :, 0]):
+        for idx in range(3):
+            squared_deltas = (
+                smooth_pos_target[it, 1:, idx] - smooth_pos_target[it, :-1, idx]
+            ) ** 2
+            smooth_squared_deltas[idx].append(squared_deltas)
+
+    if not np.any(baseline_terminated[it, :, 0]):
+        for idx in range(3):
+            squared_deltas = (
+                baseline_pos_target[it, 1:, idx] - baseline_pos_target[it, :-1, idx]
+            ) ** 2
+            baseline_squared_deltas[idx].append(squared_deltas)
+
+    if not np.any(colored_terminated[it, :, 0]):
+        for idx in range(3):
+            squared_deltas = (
+                colored_pos_target[it, 1:, idx] - colored_pos_target[it, :-1, idx]
+            ) ** 2
+            colored_squared_deltas[idx].append(squared_deltas)
+
+smooth_squared_deltas_array = np.array(smooth_squared_deltas)
+baseline_squared_deltas_array = np.array(baseline_squared_deltas)
+colored_squared_deltas_array = np.array(colored_squared_deltas)
+
+# Find the maximum value of each array
+max_smooth = np.max(smooth_squared_deltas_array)
+max_baseline = np.max(baseline_squared_deltas_array)
+max_colored = np.max(colored_squared_deltas_array)
+
+# Find the maximum value among the three arrays
+max_squared_value = max(max_smooth, max_baseline, max_colored)
+
+smooth_squared_deltas_scaled = np.divide(
+    smooth_squared_deltas_array[:, 0, :], max_squared_value
+)
+baseline_squared_deltas_scaled = np.divide(
+    baseline_squared_deltas_array[:, 0, :], max_squared_value
+)
+colored_squared_deltas_scaled = np.divide(
+    colored_squared_deltas_array[:, 0, :], max_squared_value
+)
+
+# Calculate the mean of each scaled array
+mean_smooth = np.mean(smooth_squared_deltas_scaled)
+mean_baseline = np.mean(baseline_squared_deltas_scaled)
+mean_colored = np.mean(colored_squared_deltas_scaled)
+
+# Print the mean of each scaled array
+print(f"The mean of the scaled smooth_squared_deltas array is {mean_smooth*100}")
+print(f"The mean of the scaled baseline_squared_deltas array is {mean_baseline*100}")
+print(f"The mean of the scaled colored_squared_deltas array is {mean_colored*100}")
+
+smooth_squared_deltas_scaled = np.divide(
+    smooth_squared_deltas_array[:, -1, :], max_squared_value
+)
+baseline_squared_deltas_scaled = np.divide(
+    baseline_squared_deltas_array[:, -1, :], max_squared_value
+)
+colored_squared_deltas_scaled = np.divide(
+    colored_squared_deltas_array[:, -1, :], max_squared_value
+)
+
+# Calculate the mean of each scaled array
+mean_smooth = np.mean(smooth_squared_deltas_scaled)
+mean_baseline = np.mean(baseline_squared_deltas_scaled)
+mean_colored = np.mean(colored_squared_deltas_scaled)
+
+# Print the mean of each scaled array
+print(f"The mean of the scaled smooth_squared_deltas array is {mean_smooth*100}")
+print(f"The mean of the scaled baseline_squared_deltas array is {mean_baseline*100}")
+print(f"The mean of the scaled colored_squared_deltas array is {mean_colored*100}")
diff --git a/gym/exploration_analysis/plot_ft.py b/gym/exploration_analysis/plot_ft.py
@@ -0,0 +1,106 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+# Change signal to 500 steps
+smooth_name = "mini_cheetah_ref_smooth_16"
+baseline_name = "mini_cheetah_ref"
+colored_name = "mini_cheetah_ref_colored_1"
+
+colored_data_dir = "./data_train/" + colored_name
+smooth_data_dir = "./data_train/" + smooth_name
+baseline_data_dir = "./data_train/" + baseline_name
+fig_dir = "./figures_train/"
+
+if not os.path.exists(fig_dir):
+    os.makedirs(fig_dir)
+
+# load data
+smooth_pos_target = np.load(smooth_data_dir + "/dof_pos_target.npy")[0]
+baseline_pos_target = np.load(baseline_data_dir + "/dof_pos_target.npy")[0]
+smooth_terminated = np.load(smooth_data_dir + "/terminated.npy")[0]
+baseline_terminated = np.load(baseline_data_dir + "/terminated.npy")[0]
+colored_pos_target = np.load(colored_data_dir + "/dof_pos_target.npy")[0]
+colored_terminated = np.load(colored_data_dir + "/terminated.npy")[0]
+
+# compute FFT averages
+smooth_ffts = [[], [], [], [], [], [], [], [], [], [], [], []]
+colored_ffts = [[], [], [], [], [], [], [], [], [], [], [], []]
+baseline_ffts = [[], [], [], [], [], [], [], [], [], [], [], []]
+for it in range(0, baseline_pos_target.shape[0], 50):
+    # only use data that didn't terminate
+    if not np.any(smooth_terminated[it, :, 0]):
+        for idx in range(12):
+            fft = np.fft.fft(smooth_pos_target[it, :, idx])
+            smooth_ffts[idx].append(fft[: len(fft) // 2])
+
+    if not np.any(baseline_terminated[it, :, 0]):
+        for idx in range(12):
+            fft = np.fft.fft(baseline_pos_target[it, :, idx])
+            baseline_ffts[idx].append(fft[: len(fft) // 2])
+
+    if not np.any(colored_terminated[it, :, 0]):
+        for idx in range(12):
+            fft = np.fft.fft(colored_pos_target[it, :, idx])
+            colored_ffts[idx].append(fft[: len(fft) // 2])
+
+print(f"Total smooth FFTS: {len(smooth_ffts[0])}")
+print(f"Total baseline FFTS: {len(baseline_ffts[0])}")
+print(f"Total colored FFTS: {len(colored_ffts[0])}")
+
+smooth_fft_means = [np.array(smooth_ffts[idx]).mean(axis=0) for idx in range(12)]
+baseline_fft_means = [np.array(baseline_ffts[idx]).mean(axis=0) for idx in range(12)]
+colored_fft_means = [np.array(colored_ffts[idx]).mean(axis=0) for idx in range(12)]
+
+
+def moving_average(a, n=3):
+    ret = np.cumsum(a, dtype=float)
+    ret[n:] = ret[n:] - ret[:-n]
+    return ret[n - 1 :] / n
+
+
+x_values = np.linspace(0, 50, 498)
+# plot FFTs
+fig, axs = plt.subplots(1, 2, figsize=(10, 4))
+for idx in range(2):
+    colored_smooth_start = moving_average(
+        np.array(np.abs(colored_ffts))[:, 0, :].mean(axis=0)
+    )
+    baseline_smooth_start = moving_average(
+        np.array(np.abs(baseline_ffts))[:, 0, :].mean(axis=0)
+    )
+    sde_smooth_start = moving_average(
+        np.array(np.abs(smooth_ffts))[:, 0, :].mean(axis=0)
+    )
+    colored_smooth_end = moving_average(
+        np.array(np.abs(colored_ffts))[:, -1, :].mean(axis=0)
+    )
+    baseline_smooth_end = moving_average(
+        np.array(np.abs(baseline_ffts))[:, -1, :].mean(axis=0)
+    )
+    sde_smooth_end = moving_average(
+        np.array(np.abs(smooth_ffts))[:, -1, :].mean(axis=0)
+    )
+
+    if idx == 0:
+        axs[idx].plot(x_values, colored_smooth_start, label="Pink", color="blue")
+        axs[idx].plot(x_values, baseline_smooth_start, label="Baseline", color="green")
+        axs[idx].plot(x_values, sde_smooth_start, label="gSDE-16", color="red")
+        axs[idx].set_title("Fourier Transform at the Beginning of Training")
+        axs[idx].set_xlabel("Frequency [Hz]")
+        axs[idx].set_ylabel("Amplitude")
+        axs[idx].legend()
+        axs[idx].set_ylim([-1, 40])
+
+    else:
+        axs[idx].plot(x_values, colored_smooth_end, label="Pink", color="blue")
+        axs[idx].plot(x_values, baseline_smooth_end, label="Baseline", color="green")
+        axs[idx].plot(x_values, sde_smooth_end, label="gSDE-16", color="red")
+        axs[idx].set_title("Fourier Transform at the End of Training")
+        axs[idx].set_xlabel("Frequency [Hz]")
+        axs[idx].set_ylabel("Amplitude")
+        axs[idx].legend()
+        axs[idx].set_ylim([-1, 40])
+
+fig.tight_layout()
+fig.savefig(fig_dir + "/" + "fourier.png")
diff --git a/gym/smooth_exploration/plot_play.py → gym/exploration_analysis/plot_play.py b/gym/smooth_exploration/plot_play.py → gym/exploration_analysis/plot_play.py
diff --git a/gym/exploration_analysis/plot_power.py b/gym/exploration_analysis/plot_power.py
@@ -0,0 +1,77 @@
+import numpy as np
+import os
+
+smooth_name = "mini_cheetah_ref_smooth_16"
+baseline_name = "mini_cheetah_ref"
+colored_name = "mini_cheetah_ref_colored_1"
+
+colored_data_dir = "./data_train/" + colored_name
+smooth_data_dir = "./data_train/" + smooth_name
+baseline_data_dir = "./data_train/" + baseline_name
+fig_dir = "./figures_train/"
+
+if not os.path.exists(fig_dir):
+    os.makedirs(fig_dir)
+
+# load data
+smooth_dof_vel = np.load(smooth_data_dir + "/dof_vel.npy")[0]
+baseline_dof_vel = np.load(baseline_data_dir + "/dof_vel.npy")[0]
+smooth_terminated = np.load(smooth_data_dir + "/terminated.npy")[0]
+baseline_terminated = np.load(baseline_data_dir + "/terminated.npy")[0]
+colored_dof_vel = np.load(colored_data_dir + "/dof_vel.npy")[0]
+colored_terminated = np.load(colored_data_dir + "/terminated.npy")[0]
+smooth_torques = np.load(smooth_data_dir + "/torques.npy")[0]
+baseline_torques = np.load(baseline_data_dir + "/torques.npy")[0]
+colored_torques = np.load(colored_data_dir + "/torques.npy")[0]
+
+smooth_power = [[], [], [], [], [], [], [], [], [], [], [], []]
+colored_power = [[], [], [], [], [], [], [], [], [], [], [], []]
+baseline_power = [[], [], [], [], [], [], [], [], [], [], [], []]
+for it in range(0, smooth_dof_vel.shape[0], 50):
+    # only use data that didn't terminate
+    if not np.any(smooth_terminated[it, :, 0]):
+        for idx in range(12):
+            smooth_power[idx].append(
+                np.abs(
+                    np.multiply(smooth_dof_vel[it, :, idx], smooth_torques[it, :, idx])
+                )
+            )
+
+    if not np.any(baseline_terminated[it, :, 0]):
+        for idx in range(12):
+            baseline_power[idx].append(
+                np.abs(
+                    np.multiply(
+                        baseline_dof_vel[it, :, idx], baseline_torques[it, :, idx]
+                    )
+                )
+            )
+
+    if not np.any(colored_terminated[it, :, 0]):
+        for idx in range(12):
+            colored_power[idx].append(
+                np.abs(
+                    np.multiply(
+                        colored_dof_vel[it, :, idx], colored_torques[it, :, idx]
+                    )
+                )
+            )
+
+print(f"Total smooth: {len(smooth_power[0])}")
+print(f"Total baseline: {len(baseline_power[0])}")
+print(f"Total colored: {len(colored_power[0])}")
+
+power_values = [
+    np.array(smooth_power),
+    np.array(baseline_power),
+    np.array(colored_power),
+]
+
+# Calculate mean power at the beginning and end of training
+# Calculate mean power at the beginning and end of training
+
+mean_power_beginning = [power[:, 0, :].mean() for power in power_values]
+mean_power_end = [power[:, -1, :].mean() for power in power_values]
+
+print(mean_power_beginning)
+print(mean_power_end)
diff --git a/gym/smooth_exploration/plot_train.py → gym/exploration_analysis/plot_train.py b/gym/smooth_exploration/plot_train.py → gym/exploration_analysis/plot_train.py
@@ -7,7 +7,7 @@
 SAMPLE_FREQ = 16
 STEPS = 1000
 
-name = "ref_sample_16_len_1000"
+name = "mini_cheetah_ref_colored_0.5"
 data_dir = "./data_train/" + name
 fig_dir = "./figures_train/" + name
 
@@ -29,16 +29,11 @@ def plot_fourier(data, it):
         ft = np.fft.fft(data[:, i])
         ft_half = ft[: len(ft) // 2]
         axs_ft[0].plot(np.abs(ft_half))
-        axs_ft[1].plot(np.angle(ft_half))
 
     axs_ft[0].set_title("FT Amplitude")
     axs_ft[0].set_xlabel("Frequency")
     axs_ft[0].set_ylabel("Amplitude")
     axs_ft[0].legend(["idx 0", "idx 1", "idx 2"])
-    axs_ft[1].set_title("FT Phase")
-    axs_ft[1].set_xlabel("Frequency")
-    axs_ft[1].set_ylabel("Phase")
-    axs_ft[1].legend(["idx 0", "idx 1", "idx 2"])
 
     fig_ft.savefig(fig_dir + "/dof_pos_target_FT_it_" + str(it) + ".png")