From c9ba96081025c13e818563b18e05049ed1779da7 Mon Sep 17 00:00:00 2001 From: Sebastian Urban Date: Tue, 2 Jul 2013 16:42:16 +0200 Subject: [PATCH 01/14] draw_mini_indices --- climin.pyproj | 66 +++++++++++++++++++++++++++++++++++++++++++++++--- climin/util.py | 21 ++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/climin.pyproj b/climin.pyproj index 1764fc1..09fb369 100644 --- a/climin.pyproj +++ b/climin.pyproj @@ -3,9 +3,10 @@ Debug 2.0 - 0ec7b37a-45af-4db0-a76e-a0e0ec18ac78 + {1e8caaee-4a8c-48ad-aa51-f7f049f13552} . - climin.py + + . @@ -22,7 +23,66 @@ false - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/climin/util.py b/climin/util.py index 593bc49..c3d8769 100644 --- a/climin/util.py +++ b/climin/util.py @@ -40,3 +40,24 @@ def draw_mini_slices(n_samples, batch_size, with_replacement=False): random.shuffle(idxs) for i in idxs: yield slices[i] + +def draw_mini_indices(n_samples, batch_size): + assert n_samples > batch_size + idxs = range(n_samples) + random.shuffle(idxs) + pos = 0 + + while True: + while pos + batch_size <= n_samples: + yield idxs[pos:pos + batch_size] + pos += batch_size + + batch = idxs[pos:] + needed = batch_size - len(batch) + random.shuffle(idxs) + batch += idxs[0:needed] + yield batch + pos = needed + + + From 2f3ed3efd431ddfcb45a8a1300f1c51f6e706539 Mon Sep 17 00:00:00 2001 From: Sebastian Urban Date: Mon, 22 Jul 2013 19:54:17 +0200 Subject: [PATCH 02/14] project file --- .gitignore | 3 +++ climin.pyproj | 15 +++++++++------ climin.sln | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 climin.sln diff --git a/.gitignore b/.gitignore index 7fdea58..2eb87d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ *.pyc *.egg-info +*.suo +docs/build/ + diff --git a/climin.pyproj b/climin.pyproj index 09fb369..ac710fa 100644 --- a/climin.pyproj +++ b/climin.pyproj @@ -31,26 +31,29 @@ - - - + + Code + - - + + Code + + + Code + - diff --git a/climin.sln b/climin.sln new file mode 100644 index 0000000..95fd8e0 --- /dev/null +++ b/climin.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "climin", "climin.pyproj", "{1E8CAAEE-4A8C-48AD-AA51-F7F049F13552}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1E8CAAEE-4A8C-48AD-AA51-F7F049F13552}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1E8CAAEE-4A8C-48AD-AA51-F7F049F13552}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1E8CAAEE-4A8C-48AD-AA51-F7F049F13552}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1E8CAAEE-4A8C-48AD-AA51-F7F049F13552}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal From c6843138bc9d32608e70a386cea133c8f5346ff5 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 11:07:48 +0200 Subject: [PATCH 03/14] Add a projection to the probability simplex, and a test for it. --- climin/project.py | 18 ++++++++++++++++++ test/test_project.py | 11 +++++++++++ 2 files changed, 29 insertions(+) create mode 100644 test/test_project.py diff --git a/climin/project.py b/climin/project.py index 5904c59..fbfa01b 100644 --- a/climin/project.py +++ b/climin/project.py @@ -7,6 +7,24 @@ from mathadapt import sqrt +def project_to_simplex(v, scale=1.): + """ The orthogonal projection of v into the simplex is of form sum(non_negative_part(v-a)) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ + a = min(v) - scale + f = sum(non_negative_part(v - a)) - scale + + while f > 1e-8: + diff = v - a + f = non_negative_part(diff).sum() - scale + df = (1.0 * (diff > 0)).sum() + #print((a, f, df)) + a = a + f / (df + 1e-6) + + return non_negative_part(v - a) + +def non_negative_part(v): + """ A copy of v with negative entries replaced by zero. """ + return (v + abs(v)) / 2 def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. diff --git a/test/test_project.py b/test/test_project.py new file mode 100644 index 0000000..c4b2623 --- /dev/null +++ b/test/test_project.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +import climin.project as pr +from numpy import array, allclose + +def test_simplex_projection(): + point = array([1., 2, 3]) + scale = 1.43 + distribution = pr.project_to_simplex(point, scale=scale) + assert allclose(distribution.sum(), scale, atol = 1e-13), 'projection failed to achieve requested sum of elements' + assert (distribution >= 0.).all(), 'projection produced negative elements' From 116b2afa295f6a5b7023f8f4d5b3323a6b876220 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 12:27:09 +0200 Subject: [PATCH 04/14] Improve the docstring for new projection to simplex. --- climin/project.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/climin/project.py b/climin/project.py index fbfa01b..e609930 100644 --- a/climin/project.py +++ b/climin/project.py @@ -8,7 +8,10 @@ from mathadapt import sqrt def project_to_simplex(v, scale=1.): - """ The orthogonal projection of v into the simplex is of form sum(non_negative_part(v-a)) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ Project v into the probability simplex, return the result. If a different sum is desired for the entries, use scale. + + The orthogonal projection of v into the simplex is of form non_negative_part(v-a) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ a = min(v) - scale f = sum(non_negative_part(v - a)) - scale From fc3381b6180dfe3962a4af869afe76fe760029cc Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 15:22:27 +0200 Subject: [PATCH 05/14] A test that fails for sklearn < 0.14 now skipped instead. --- test/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_util.py b/test/test_util.py index 7ebbf6f..345874c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -26,6 +26,7 @@ def test_optimizer(): def test_optimizer_distribution(): try: import sklearn + from sklearn.grid_search import ParameterSampler # not available on sklearn < 0.14. except ImportError: raise SkipTest() rv = OptimizerDistribution(gd={'steprate': [.1, .2], From 2163b3e11c4373f8f7cd639d1233bff50336917a Mon Sep 17 00:00:00 2001 From: Sebastian Urban Date: Sun, 29 Dec 2013 14:00:44 +0100 Subject: [PATCH 06/14] RMSprop: function to reset internal state of optimizer --- climin/rmsprop.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/climin/rmsprop.py b/climin/rmsprop.py index 2b81e19..4d845bc 100644 --- a/climin/rmsprop.py +++ b/climin/rmsprop.py @@ -125,15 +125,16 @@ def __init__(self, wrt, fprime, steprate, decay=0.9, momentum=0, super(RmsProp, self).__init__(wrt, args=args) self.fprime = fprime - self._steprate = steprate self.decay = decay self.momentum = momentum self.step_adapt = step_adapt self.step_rate_min = step_rate_min self.step_rate_max = step_rate_max + self.steprate = steprate @property def steprate(self): + """Step rate to use during optimization""" return self._steprate @steprate.setter @@ -145,15 +146,13 @@ def steprate(self, steprate): if self.step_adapt: self.step_rate *= ones_like(self.wrt) - def __iter__(self): + def reset(self): + """Resets the momentum and the current estimate of the gradient.""" self.moving_mean_squared = 1 self.step_m1 = 0 - self.step_rate = self._steprate - - # If we adapt step rates, we need one for each parameter. - if self.step_adapt: - self.step_rate *= ones_like(self.wrt) + def __iter__(self): + self.reset() for i, (args, kwargs) in enumerate(self.args): # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. From 5e4cd8d53731aee15638e4b4d4e8d23e726a9602 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 18 Jan 2014 20:06:02 +0200 Subject: [PATCH 07/14] PEP8 fixes on projection to simplex. --- .idea/scopes/scope_settings.xml | 5 +++++ climin/project.py | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 .idea/scopes/scope_settings.xml diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/climin/project.py b/climin/project.py index e609930..a100804 100644 --- a/climin/project.py +++ b/climin/project.py @@ -7,10 +7,22 @@ from mathadapt import sqrt + def project_to_simplex(v, scale=1.): - """ Project v into the probability simplex, return the result. If a different sum is desired for the entries, use scale. + """ Project v into the probability simplex, return the result. + If a different sum is desired for the entries, use scale. + + The orthogonal projection of v into the simplex is of form + + non_negative_part(v-a) - The orthogonal projection of v into the simplex is of form non_negative_part(v-a) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + for some a. + + The function a->sum(non_negative_part(v-a)) is decreasing and convex. + Then we can use a newton's iteration, and piecewise linearity give + finite convergence. + This is faster than the O(n log(n)) using binary search, + and there exist more complicated O(n) algorithms. """ a = min(v) - scale @@ -21,14 +33,16 @@ def project_to_simplex(v, scale=1.): f = non_negative_part(diff).sum() - scale df = (1.0 * (diff > 0)).sum() #print((a, f, df)) - a = a + f / (df + 1e-6) + a += f / (df + 1e-6) return non_negative_part(v - a) + def non_negative_part(v): """ A copy of v with negative entries replaced by zero. """ return (v + abs(v)) / 2 + def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. From 401e7d6fe6af8a0cab95ac84e614f3133ee2f6b3 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sun, 19 Jan 2014 10:53:09 +0200 Subject: [PATCH 08/14] Documentation fixes on projection to simplex. --- .idea/inspectionProfiles/Project_Default.xml | 16 ++++++++++++++++ .idea/inspectionProfiles/profiles_settings.xml | 7 +++++++ climin/project.py | 17 ++++++++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..7586e7c --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,16 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..3b31283 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/climin/project.py b/climin/project.py index a100804..74f1cf3 100644 --- a/climin/project.py +++ b/climin/project.py @@ -4,19 +4,30 @@ import numpy as np +from numpy import sum, min from mathadapt import sqrt def project_to_simplex(v, scale=1.): - """ Project v into the probability simplex, return the result. - If a different sum is desired for the entries, use scale. + """ Project a vector into the probability simplex, return the result. + + The result is the closest non-negative vector whose elements sum to scale. + + Parameters + ---------- + v : np.array((n)) + The vector in R^n to project onto probability simplex + + scale : the sum of the elements in the result will be scale. + + If a different sum is desired for the entries, set scale accordingly. The orthogonal projection of v into the simplex is of form non_negative_part(v-a) - for some a. + for some a (see) The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give From a7343aee0a5511ed066eb80ed8b94e6428f075af Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 11:07:48 +0200 Subject: [PATCH 09/14] Add a projection to the probability simplex, and a test for it. --- climin/project.py | 18 ++++++++++++++++++ test/test_project.py | 11 +++++++++++ 2 files changed, 29 insertions(+) create mode 100644 test/test_project.py diff --git a/climin/project.py b/climin/project.py index 5904c59..fbfa01b 100644 --- a/climin/project.py +++ b/climin/project.py @@ -7,6 +7,24 @@ from mathadapt import sqrt +def project_to_simplex(v, scale=1.): + """ The orthogonal projection of v into the simplex is of form sum(non_negative_part(v-a)) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ + a = min(v) - scale + f = sum(non_negative_part(v - a)) - scale + + while f > 1e-8: + diff = v - a + f = non_negative_part(diff).sum() - scale + df = (1.0 * (diff > 0)).sum() + #print((a, f, df)) + a = a + f / (df + 1e-6) + + return non_negative_part(v - a) + +def non_negative_part(v): + """ A copy of v with negative entries replaced by zero. """ + return (v + abs(v)) / 2 def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. diff --git a/test/test_project.py b/test/test_project.py new file mode 100644 index 0000000..c4b2623 --- /dev/null +++ b/test/test_project.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +import climin.project as pr +from numpy import array, allclose + +def test_simplex_projection(): + point = array([1., 2, 3]) + scale = 1.43 + distribution = pr.project_to_simplex(point, scale=scale) + assert allclose(distribution.sum(), scale, atol = 1e-13), 'projection failed to achieve requested sum of elements' + assert (distribution >= 0.).all(), 'projection produced negative elements' From 98543c1ed208d93fc61a09b032ceb34c7473047b Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 12:27:09 +0200 Subject: [PATCH 10/14] Improve the docstring for new projection to simplex. --- climin/project.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/climin/project.py b/climin/project.py index fbfa01b..e609930 100644 --- a/climin/project.py +++ b/climin/project.py @@ -8,7 +8,10 @@ from mathadapt import sqrt def project_to_simplex(v, scale=1.): - """ The orthogonal projection of v into the simplex is of form sum(non_negative_part(v-a)) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ Project v into the probability simplex, return the result. If a different sum is desired for the entries, use scale. + + The orthogonal projection of v into the simplex is of form non_negative_part(v-a) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + """ a = min(v) - scale f = sum(non_negative_part(v - a)) - scale From 3c02be923a12be6824f4c0681db08ba428924264 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 7 Dec 2013 15:22:27 +0200 Subject: [PATCH 11/14] A test that fails for sklearn < 0.14 now skipped instead. --- test/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_util.py b/test/test_util.py index 7ebbf6f..345874c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -26,6 +26,7 @@ def test_optimizer(): def test_optimizer_distribution(): try: import sklearn + from sklearn.grid_search import ParameterSampler # not available on sklearn < 0.14. except ImportError: raise SkipTest() rv = OptimizerDistribution(gd={'steprate': [.1, .2], From 4d9d09283291fb0b308f34fb49ea49aa681d2c22 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sat, 18 Jan 2014 20:06:02 +0200 Subject: [PATCH 12/14] PEP8 fixes on projection to simplex. --- .idea/scopes/scope_settings.xml | 5 +++++ climin/project.py | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 .idea/scopes/scope_settings.xml diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/climin/project.py b/climin/project.py index e609930..a100804 100644 --- a/climin/project.py +++ b/climin/project.py @@ -7,10 +7,22 @@ from mathadapt import sqrt + def project_to_simplex(v, scale=1.): - """ Project v into the probability simplex, return the result. If a different sum is desired for the entries, use scale. + """ Project v into the probability simplex, return the result. + If a different sum is desired for the entries, use scale. + + The orthogonal projection of v into the simplex is of form + + non_negative_part(v-a) - The orthogonal projection of v into the simplex is of form non_negative_part(v-a) for some a. The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give finite convergence. This is faster than the O(n log(n)) using binary search, and there exist more complicated O(n) algorithms. + for some a. + + The function a->sum(non_negative_part(v-a)) is decreasing and convex. + Then we can use a newton's iteration, and piecewise linearity give + finite convergence. + This is faster than the O(n log(n)) using binary search, + and there exist more complicated O(n) algorithms. """ a = min(v) - scale @@ -21,14 +33,16 @@ def project_to_simplex(v, scale=1.): f = non_negative_part(diff).sum() - scale df = (1.0 * (diff > 0)).sum() #print((a, f, df)) - a = a + f / (df + 1e-6) + a += f / (df + 1e-6) return non_negative_part(v - a) + def non_negative_part(v): """ A copy of v with negative entries replaced by zero. """ return (v + abs(v)) / 2 + def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. From 10752deb29034406b8174a97739de215ea9e7d00 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sun, 19 Jan 2014 10:53:09 +0200 Subject: [PATCH 13/14] Documentation fixes on projection to simplex. --- .idea/inspectionProfiles/Project_Default.xml | 16 ++++++++++++++++ .idea/inspectionProfiles/profiles_settings.xml | 7 +++++++ climin/project.py | 17 ++++++++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..7586e7c --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,16 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..3b31283 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/climin/project.py b/climin/project.py index a100804..74f1cf3 100644 --- a/climin/project.py +++ b/climin/project.py @@ -4,19 +4,30 @@ import numpy as np +from numpy import sum, min from mathadapt import sqrt def project_to_simplex(v, scale=1.): - """ Project v into the probability simplex, return the result. - If a different sum is desired for the entries, use scale. + """ Project a vector into the probability simplex, return the result. + + The result is the closest non-negative vector whose elements sum to scale. + + Parameters + ---------- + v : np.array((n)) + The vector in R^n to project onto probability simplex + + scale : the sum of the elements in the result will be scale. + + If a different sum is desired for the entries, set scale accordingly. The orthogonal projection of v into the simplex is of form non_negative_part(v-a) - for some a. + for some a (see) The function a->sum(non_negative_part(v-a)) is decreasing and convex. Then we can use a newton's iteration, and piecewise linearity give From a7788253219cfbeaa1e5a99465e63c08d60d0808 Mon Sep 17 00:00:00 2001 From: Daniel Vainsencher Date: Sun, 19 Jan 2014 11:45:53 +0200 Subject: [PATCH 14/14] Use numpy's sum, min explicitly. Some renames for clarity. Typos in documentation. --- climin/project.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/climin/project.py b/climin/project.py index 74f1cf3..4fa5a9a 100644 --- a/climin/project.py +++ b/climin/project.py @@ -4,7 +4,6 @@ import numpy as np -from numpy import sum, min from mathadapt import sqrt @@ -25,33 +24,33 @@ def project_to_simplex(v, scale=1.): The orthogonal projection of v into the simplex is of form - non_negative_part(v-a) + non_negative_part(v-adjustment) for some a (see) - The function a->sum(non_negative_part(v-a)) is decreasing and convex. - Then we can use a newton's iteration, and piecewise linearity give + The function adjustment->sum(non_negative_part(v-adjustment)) is decreasing and convex. + Then we can use a newton's iteration, and piecewise linearity gives finite convergence. - This is faster than the O(n log(n)) using binary search, + This is faster than the O(n log(1/epsilon)) using binary search, and there exist more complicated O(n) algorithms. """ - a = min(v) - scale - f = sum(non_negative_part(v - a)) - scale + adjustment = np.min(v) - scale + sum_deviation = np.sum(non_negative_part(v - adjustment)) - scale - while f > 1e-8: - diff = v - a - f = non_negative_part(diff).sum() - scale + while sum_deviation > 1e-8: + diff = v - adjustment + sum_deviation = non_negative_part(diff).sum() - scale df = (1.0 * (diff > 0)).sum() #print((a, f, df)) - a += f / (df + 1e-6) + adjustment += sum_deviation / (df + 1e-6) - return non_negative_part(v - a) + return non_negative_part(v - adjustment) def non_negative_part(v): """ A copy of v with negative entries replaced by zero. """ - return (v + abs(v)) / 2 + return (v + np.abs(v)) / 2 def max_length_columns(arr, max_length):