BRML · daniel-vainsencher · Jul 2, 2013 · Jul 2, 2013 · Jul 22, 2013 · Jul 22, 2013
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml
diff --git a/climin/project.py b/climin/project.py
@@ -8,6 +8,51 @@
 from mathadapt import sqrt
 
 
+def project_to_simplex(v, scale=1.):
+    """ Project a vector into the probability simplex, return the result.
+
+    The result is the closest non-negative vector whose elements sum to scale.
+
+    Parameters
+    ----------
+    v : np.array((n))
+        The vector in R^n to project onto probability simplex
+
+    scale : the sum of the elements in the result will be scale.
+
+    If a different sum is desired for the entries, set scale accordingly.
+
+    The orthogonal projection of v into the simplex is of form
+
+       non_negative_part(v-adjustment)
+
+    for some a (see)
+
+    The function adjustment->sum(non_negative_part(v-adjustment)) is decreasing and convex.
+    Then we can use a newton's iteration, and piecewise linearity gives
+    finite convergence.
+    This is faster than the O(n log(1/epsilon)) using binary search,
+    and there exist more complicated O(n) algorithms.
+
+    """
+    adjustment = np.min(v) - scale
+    sum_deviation = np.sum(non_negative_part(v - adjustment)) - scale
+
+    while sum_deviation > 1e-8:
+        diff = v - adjustment
+        sum_deviation = non_negative_part(diff).sum() - scale
+        df = (1.0 * (diff > 0)).sum()
+        #print((a, f, df))
+        adjustment += sum_deviation / (df + 1e-6)
+
+    return non_negative_part(v - adjustment)
+
+
+def non_negative_part(v):
+    """ A copy of v with negative entries replaced by zero. """
+    return (v + np.abs(v)) / 2
+
+
 def max_length_columns(arr, max_length):
     """Project the columns of an array below a certain length.
 

diff --git a/climin/rmsprop.py b/climin/rmsprop.py
@@ -125,15 +125,16 @@ def __init__(self, wrt, fprime, steprate, decay=0.9, momentum=0,
         super(RmsProp, self).__init__(wrt, args=args)
 
         self.fprime = fprime
-        self._steprate = steprate
         self.decay = decay
         self.momentum = momentum
         self.step_adapt = step_adapt
         self.step_rate_min = step_rate_min
         self.step_rate_max = step_rate_max
+        self.steprate = steprate
 
     @property
     def steprate(self):
+        """Step rate to use during optimization"""
         return self._steprate
 
     @steprate.setter
@@ -145,15 +146,13 @@ def steprate(self, steprate):
         if self.step_adapt:
             self.step_rate *= ones_like(self.wrt)
 
-    def __iter__(self):
+    def reset(self):
+        """Resets the momentum and the current estimate of the gradient."""
         self.moving_mean_squared = 1
         self.step_m1 = 0
-        self.step_rate = self._steprate
-
-        # If we adapt step rates, we need one for each parameter.
-        if self.step_adapt:
-            self.step_rate *= ones_like(self.wrt)
 
+    def __iter__(self):
+        self.reset()
         for i, (args, kwargs) in enumerate(self.args):
             # We use Nesterov momentum: first, we make a step according to the
             # momentum and then we calculate the gradient.

diff --git a/test/test_project.py b/test/test_project.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+import climin.project as pr
+from numpy import array, allclose
+
+def test_simplex_projection():
+    point = array([1., 2, 3])
+    scale = 1.43
+    distribution = pr.project_to_simplex(point, scale=scale)
+    assert allclose(distribution.sum(), scale, atol = 1e-13), 'projection failed to achieve requested sum of elements'
+    assert (distribution >= 0.).all(), 'projection produced negative elements'
diff --git a/test/test_util.py b/test/test_util.py
@@ -26,6 +26,7 @@ def test_optimizer():
 def test_optimizer_distribution():
     try:
         import sklearn
+        from sklearn.grid_search import ParameterSampler # not available on sklearn < 0.14.
     except ImportError:
         raise SkipTest()
     rv = OptimizerDistribution(gd={'steprate': [.1, .2],