diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..7586e7c --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,16 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..3b31283 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/climin/project.py b/climin/project.py index 5904c59..4fa5a9a 100644 --- a/climin/project.py +++ b/climin/project.py @@ -8,6 +8,51 @@ from mathadapt import sqrt +def project_to_simplex(v, scale=1.): + """ Project a vector into the probability simplex, return the result. + + The result is the closest non-negative vector whose elements sum to scale. + + Parameters + ---------- + v : np.array((n)) + The vector in R^n to project onto probability simplex + + scale : the sum of the elements in the result will be scale. + + If a different sum is desired for the entries, set scale accordingly. + + The orthogonal projection of v into the simplex is of form + + non_negative_part(v-adjustment) + + for some a (see) + + The function adjustment->sum(non_negative_part(v-adjustment)) is decreasing and convex. + Then we can use a newton's iteration, and piecewise linearity gives + finite convergence. + This is faster than the O(n log(1/epsilon)) using binary search, + and there exist more complicated O(n) algorithms. + + """ + adjustment = np.min(v) - scale + sum_deviation = np.sum(non_negative_part(v - adjustment)) - scale + + while sum_deviation > 1e-8: + diff = v - adjustment + sum_deviation = non_negative_part(diff).sum() - scale + df = (1.0 * (diff > 0)).sum() + #print((a, f, df)) + adjustment += sum_deviation / (df + 1e-6) + + return non_negative_part(v - adjustment) + + +def non_negative_part(v): + """ A copy of v with negative entries replaced by zero. """ + return (v + np.abs(v)) / 2 + + def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. diff --git a/climin/rmsprop.py b/climin/rmsprop.py index 2b81e19..4d845bc 100644 --- a/climin/rmsprop.py +++ b/climin/rmsprop.py @@ -125,15 +125,16 @@ def __init__(self, wrt, fprime, steprate, decay=0.9, momentum=0, super(RmsProp, self).__init__(wrt, args=args) self.fprime = fprime - self._steprate = steprate self.decay = decay self.momentum = momentum self.step_adapt = step_adapt self.step_rate_min = step_rate_min self.step_rate_max = step_rate_max + self.steprate = steprate @property def steprate(self): + """Step rate to use during optimization""" return self._steprate @steprate.setter @@ -145,15 +146,13 @@ def steprate(self, steprate): if self.step_adapt: self.step_rate *= ones_like(self.wrt) - def __iter__(self): + def reset(self): + """Resets the momentum and the current estimate of the gradient.""" self.moving_mean_squared = 1 self.step_m1 = 0 - self.step_rate = self._steprate - - # If we adapt step rates, we need one for each parameter. - if self.step_adapt: - self.step_rate *= ones_like(self.wrt) + def __iter__(self): + self.reset() for i, (args, kwargs) in enumerate(self.args): # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. diff --git a/test/test_project.py b/test/test_project.py new file mode 100644 index 0000000..c4b2623 --- /dev/null +++ b/test/test_project.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +import climin.project as pr +from numpy import array, allclose + +def test_simplex_projection(): + point = array([1., 2, 3]) + scale = 1.43 + distribution = pr.project_to_simplex(point, scale=scale) + assert allclose(distribution.sum(), scale, atol = 1e-13), 'projection failed to achieve requested sum of elements' + assert (distribution >= 0.).all(), 'projection produced negative elements' diff --git a/test/test_util.py b/test/test_util.py index 7ebbf6f..345874c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -26,6 +26,7 @@ def test_optimizer(): def test_optimizer_distribution(): try: import sklearn + from sklearn.grid_search import ParameterSampler # not available on sklearn < 0.14. except ImportError: raise SkipTest() rv = OptimizerDistribution(gd={'steprate': [.1, .2],