Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions es/separable_natural_es.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import multiprocessing as mp
import numpy as np
import torch

from . import lib


def optimize(func, mu, sigma,
learning_rate_mu=None, learning_rate_sigma=None, population_size=None,
max_iter=2000,
fitness_shaping=True, mirrored_sampling=True, record_history=False,
rng=None,
parallel_threads=None):
parallel_threads=None,
optimizer=torch.optim.SGD):
"""
Evolution strategies using the natural gradient of multinormal search distributions in natural coordinates.
Does not consider covariances between parameters.
Expand Down Expand Up @@ -37,12 +41,21 @@ def optimize(func, mu, sigma,
history_pop = []
history_fitness = []

# convert mu to torch Variable and construct optimizer; force
# torch to use double representation
mu_torch = torch.autograd.Variable(torch.DoubleTensor(mu.copy()), requires_grad=True)
optimizer_mu = optimizer([mu_torch], lr=learning_rate_mu)

while True:
s = rng.normal(0, 1, size=(population_size, *np.shape(mu)))
z = mu + sigma * s

# use numpy representation for generating individuals
mu_numpy = mu_torch.detach().numpy()

s = rng.normal(0, 1, size=(population_size, *np.shape(mu_numpy)))
z = mu_numpy + sigma * s

if mirrored_sampling:
z = np.vstack([z, mu - sigma * s])
z = np.vstack([z, mu_numpy - sigma * s])
s = np.vstack([s, -s])

if parallel_threads is None:
Expand All @@ -65,26 +78,31 @@ def optimize(func, mu, sigma,
else:
utility = fitness

# update parameter of search distribution via natural gradient descent in natural coordinates
mu += learning_rate_mu * sigma * np.dot(utility, s)
sigma *= np.exp(learning_rate_sigma / 2. * np.dot(utility, s ** 2 - 1))

if record_history:
history_mu.append(mu.copy())
history_mu.append(mu_numpy.copy())
history_sigma.append(sigma.copy())
history_pop.append(z.copy())
history_fitness.append(fitness.copy())

generation += 1

# exit if max iterations reached
if generation > max_iter or np.all(sigma < 1e-10):
break

return {'mu': mu,
# update parameters of search distribution via natural
# gradient descent in natural coordinates

# set gradient and use optimizer to update mu
mu_torch.grad = torch.autograd.Variable(torch.DoubleTensor(-sigma * np.dot(utility, s)))
optimizer_mu.step()

# manually update sigma
sigma *= np.exp(learning_rate_sigma / 2. * np.dot(utility, s ** 2 - 1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense here to update log(sigma) with the pytorch-optimizer? Or is that a bad idea due to numerical stability issues?


generation += 1

return {'mu': mu_numpy,
'sigma': sigma,
'history_mu': history_mu,
'history_sigma': history_sigma,
'history_fitness': history_fitness,
'history_pop': history_pop}

75 changes: 71 additions & 4 deletions test/test_snes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch

sys.path.append('../')

Expand All @@ -25,7 +26,8 @@ def test_quadratic_1d():
def f(x):
return functions.f_1d(x, x0)

res = snes.optimize(f, np.array([mu]), np.array([sigma]), max_iter=MAX_ITER)
res = snes.optimize(f, np.array([mu]), np.array([sigma]),
max_iter=MAX_ITER, rng=SEED)

assert(abs(res['mu'] - x0) < TOLERANCE_1D), SEED

Expand All @@ -43,7 +45,8 @@ def test_quadratic_2d():
def f(x):
return functions.f_2d(x, x0, y0)

res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]), max_iter=MAX_ITER)
res = snes.optimize(f, np.array([mu_x, mu_y]),
np.array([sigma_x, sigma_y]), max_iter=MAX_ITER, rng=SEED)

assert(abs(res['mu'][0] - x0) < TOLERANCE_2D), SEED
assert(abs(res['mu'][1] - y0) < TOLERANCE_2D), SEED
Expand All @@ -62,7 +65,9 @@ def test_quadratic_2d_non_isotropic():
def f(x):
return functions.f_2d_nonisotropic(x, x0, y0)

res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]), max_iter=MAX_ITER, record_history=True)
res = snes.optimize(f, np.array([mu_x, mu_y]),
np.array([sigma_x, sigma_y]), max_iter=MAX_ITER,
record_history=True, rng=SEED)

assert(abs(res['mu'][0] - x0) < TOLERANCE_2D), SEED
assert(abs(res['mu'][1] - y0) < TOLERANCE_2D), SEED
Expand Down Expand Up @@ -91,7 +96,69 @@ def f(x):

res = snes.optimize(f, np.array([mu_x, mu_y]), np.array([sigma_x, sigma_y]),
# learning_rate_mu=0.1, learning_rate_sigma=0.00025,
max_iter=MAX_ITER_ROSENBROCK)
max_iter=MAX_ITER_ROSENBROCK,
rng=SEED)

assert(abs(res['mu'][0] - theo_min[0]) < TOLERANCE_ROSENBROCK), SEED
assert(abs(res['mu'][1] - theo_min[1]) < TOLERANCE_ROSENBROCK), SEED


def test_ann():
np.random.seed(SEED)
torch.manual_seed(SEED)

criterion = torch.nn.MSELoss()
trials = 2
inner_iterations = 50
in_features = 2
hidden_units = 4
out_features = 1

class ANN(torch.nn.Module):

def __init__(self):
super().__init__()

self.fc1 = torch.nn.Linear(in_features, hidden_units)
self.fc2 = torch.nn.Linear(hidden_units, out_features)

def forward(self, x):
h = torch.nn.functional.tanh(self.fc1(x))
return self.fc2(h)

def set_parameters(self, z):
offset = 0
for m in self.children():

weight_size = m.in_features * m.out_features
m.weight.data = torch.Tensor(z[offset:offset + weight_size].reshape(m.out_features, m.in_features))
offset += weight_size

bias_size = m.out_features
m.bias.data = torch.Tensor(z[offset:offset + bias_size])
offset += bias_size

for _ in range(trials):

model_target = ANN()
model = ANN()

def f(z):

model.set_parameters(z)

loss = 0
for x in 2 * torch.rand(inner_iterations, in_features) - 1:
target = torch.autograd.Variable(model_target(x), requires_grad=False)
loss += criterion(model(x), target)

return -loss / inner_iterations

param_count = in_features * hidden_units + hidden_units + hidden_units * out_features + out_features
mu = np.random.randn(param_count)
sigma = np.ones(param_count)

res = snes.optimize(f, mu, sigma, record_history=True,
max_iter=100, rng=SEED, optimizer=torch.optim.SGD)

assert(abs(np.mean(res['history_fitness'][-1])) < 0.1)