diff --git a/es/separable_natural_es.py b/es/separable_natural_es.py index a1bd3de..c31071b 100644 --- a/es/separable_natural_es.py +++ b/es/separable_natural_es.py @@ -1,13 +1,17 @@ import multiprocessing as mp import numpy as np +import torch + from . import lib + def optimize(func, mu, sigma, learning_rate_mu=None, learning_rate_sigma=None, population_size=None, max_iter=2000, fitness_shaping=True, mirrored_sampling=True, record_history=False, rng=None, - parallel_threads=None): + parallel_threads=None, + optimizer=torch.optim.SGD): """ Evolution strategies using the natural gradient of multinormal search distributions in natural coordinates. Does not consider covariances between parameters. @@ -37,12 +41,21 @@ def optimize(func, mu, sigma, history_pop = [] history_fitness = [] + # convert mu to torch Variable and construct optimizer; force + # torch to use double representation + mu_torch = torch.autograd.Variable(torch.DoubleTensor(mu.copy()), requires_grad=True) + optimizer_mu = optimizer([mu_torch], lr=learning_rate_mu) + while True: - s = rng.normal(0, 1, size=(population_size, *np.shape(mu))) - z = mu + sigma * s + + # use numpy representation for generating individuals + mu_numpy = mu_torch.detach().numpy() + + s = rng.normal(0, 1, size=(population_size, *np.shape(mu_numpy))) + z = mu_numpy + sigma * s if mirrored_sampling: - z = np.vstack([z, mu - sigma * s]) + z = np.vstack([z, mu_numpy - sigma * s]) s = np.vstack([s, -s]) if parallel_threads is None: @@ -65,26 +78,31 @@ def optimize(func, mu, sigma, else: utility = fitness - # update parameter of search distribution via natural gradient descent in natural coordinates - mu += learning_rate_mu * sigma * np.dot(utility, s) - sigma *= np.exp(learning_rate_sigma / 2. * np.dot(utility, s ** 2 - 1)) - if record_history: - history_mu.append(mu.copy()) + history_mu.append(mu_numpy.copy()) history_sigma.append(sigma.copy()) history_pop.append(z.copy()) history_fitness.append(fitness.copy()) - generation += 1 - # exit if max iterations reached if generation > max_iter or np.all(sigma < 1e-10): break - return {'mu': mu, + # update parameters of search distribution via natural + # gradient descent in natural coordinates + + # set gradient and use optimizer to update mu + mu_torch.grad = torch.autograd.Variable(torch.DoubleTensor(-sigma * np.dot(utility, s))) + optimizer_mu.step() + + # manually update sigma + sigma *= np.exp(learning_rate_sigma / 2. * np.dot(utility, s ** 2 - 1)) + + generation += 1 + + return {'mu': mu_numpy, 'sigma': sigma, 'history_mu': history_mu, 'history_sigma': history_sigma, 'history_fitness': history_fitness, 'history_pop': history_pop} - \ No newline at end of file diff --git a/test/test_snes.py b/test/test_snes.py index c37de58..d08680f 100644 --- a/test/test_snes.py +++ b/test/test_snes.py @@ -1,6 +1,7 @@ import matplotlib.pyplot as plt import numpy as np import sys +import torch sys.path.append('../') @@ -25,7 +26,8 @@ def test_quadratic_1d(): def f(x): return functions.f_1d(x, x0) - res = snes.optimize(f, np.array([mu]), np.array([sigma]), max_iter=MAX_ITER) + res = snes.optimize(f, np.array([mu]), np.array([sigma]), + max_iter=MAX_ITER, rng=SEED) assert(abs(res['mu'] - x0) < TOLERANCE_1D), SEED @@ -43,7 +45,8 @@ def test_quadratic_2d(): def f(x): return functions.f_2d(x, x0, y0) - res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]), max_iter=MAX_ITER) + res = snes.optimize(f, np.array([mu_x, mu_y]), + np.array([sigma_x, sigma_y]), max_iter=MAX_ITER, rng=SEED) assert(abs(res['mu'][0] - x0) < TOLERANCE_2D), SEED assert(abs(res['mu'][1] - y0) < TOLERANCE_2D), SEED @@ -62,7 +65,9 @@ def test_quadratic_2d_non_isotropic(): def f(x): return functions.f_2d_nonisotropic(x, x0, y0) - res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]), max_iter=MAX_ITER, record_history=True) + res = snes.optimize(f, np.array([mu_x, mu_y]), + np.array([sigma_x, sigma_y]), max_iter=MAX_ITER, + record_history=True, rng=SEED) assert(abs(res['mu'][0] - x0) < TOLERANCE_2D), SEED assert(abs(res['mu'][1] - y0) < TOLERANCE_2D), SEED @@ -91,7 +96,69 @@ def f(x): res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]), # learning_rate_mu=0.1, learning_rate_sigma=0.00025, - max_iter=MAX_ITER_ROSENBROCK) + max_iter=MAX_ITER_ROSENBROCK, + rng=SEED) assert(abs(res['mu'][0] - theo_min[0]) < TOLERANCE_ROSENBROCK), SEED assert(abs(res['mu'][1] - theo_min[1]) < TOLERANCE_ROSENBROCK), SEED + + +def test_ann(): + np.random.seed(SEED) + torch.manual_seed(SEED) + + criterion = torch.nn.MSELoss() + trials = 2 + inner_iterations = 50 + in_features = 2 + hidden_units = 4 + out_features = 1 + + class ANN(torch.nn.Module): + + def __init__(self): + super().__init__() + + self.fc1 = torch.nn.Linear(in_features, hidden_units) + self.fc2 = torch.nn.Linear(hidden_units, out_features) + + def forward(self, x): + h = torch.nn.functional.tanh(self.fc1(x)) + return self.fc2(h) + + def set_parameters(self, z): + offset = 0 + for m in self.children(): + + weight_size = m.in_features * m.out_features + m.weight.data = torch.Tensor(z[offset:offset + weight_size].reshape(m.out_features, m.in_features)) + offset += weight_size + + bias_size = m.out_features + m.bias.data = torch.Tensor(z[offset:offset + bias_size]) + offset += bias_size + + for _ in range(trials): + + model_target = ANN() + model = ANN() + + def f(z): + + model.set_parameters(z) + + loss = 0 + for x in 2 * torch.rand(inner_iterations, in_features) - 1: + target = torch.autograd.Variable(model_target(x), requires_grad=False) + loss += criterion(model(x), target) + + return -loss / inner_iterations + + param_count = in_features * hidden_units + hidden_units + hidden_units * out_features + out_features + mu = np.random.randn(param_count) + sigma = np.ones(param_count) + + res = snes.optimize(f, mu, sigma, record_history=True, + max_iter=100, rng=SEED, optimizer=torch.optim.SGD) + + assert(abs(np.mean(res['history_fitness'][-1])) < 0.1)