From f13d01d72397dbe3bbedb9c8b8dba2ad9e6ce525 Mon Sep 17 00:00:00 2001 From: Binary Date: Tue, 10 Sep 2019 16:52:49 +0200 Subject: [PATCH 1/2] use generators instead of pre-computing all the samples into the memory helps to save huge amount of RAM while preserving performance --- bootstrapped/bootstrap.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/bootstrapped/bootstrap.py b/bootstrapped/bootstrap.py index 807aa4b..eed1015 100644 --- a/bootstrapped/bootstrap.py +++ b/bootstrapped/bootstrap.py @@ -9,6 +9,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals +from itertools import izip import numpy as _np import multiprocessing as _multiprocessing @@ -180,13 +181,14 @@ def _generate_distributions(values_lists, num_iterations): else: values_shape = values_lists[0].shape[0] - ids = _np.random.choice( - values_shape, - (num_iterations, values_shape), - replace=True, - ) - results = [values[ids] for values in values_lists] + results = ( + ( + values[_np.random.choice( + values_shape, values_shape, replace=True + )] for _ in xrange(num_iterations) + ) for values in values_lists + ) return results @@ -209,8 +211,11 @@ def _bootstrap_sim(values_lists, stat_func_lists, num_iterations, values_sims = _generate_distributions(values_lists, max_rng) - for i, values_sim, stat_func in zip(range(len(values_sims)), values_sims, stat_func_lists): - results[i].extend(stat_func(values_sim)) + for i, (values_sim, stat_func) in enumerate(izip( + values_sims, stat_func_lists)): + results[i].extend( + stat_func(row) for row in values_sim + ) return _np.array(results) From 482ae60bf14255766c0b2e1648c6b1c9e69631d4 Mon Sep 17 00:00:00 2001 From: Binary Date: Tue, 10 Sep 2019 22:50:02 +0200 Subject: [PATCH 2/2] tests now pass on python 3 --- bootstrapped/bootstrap.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bootstrapped/bootstrap.py b/bootstrapped/bootstrap.py index eed1015..d2ede3e 100644 --- a/bootstrapped/bootstrap.py +++ b/bootstrapped/bootstrap.py @@ -9,7 +9,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals -from itertools import izip +try: + from itertools import izip +except ImportError: + izip = zip import numpy as _np import multiprocessing as _multiprocessing @@ -186,7 +189,7 @@ def _generate_distributions(values_lists, num_iterations): ( values[_np.random.choice( values_shape, values_shape, replace=True - )] for _ in xrange(num_iterations) + )] for _ in range(num_iterations) ) for values in values_lists ) return results