diff --git a/pmesh/_whitenoise_generics.h b/pmesh/_whitenoise_generics.h index 85cf745..1a5d20b 100644 --- a/pmesh/_whitenoise_generics.h +++ b/pmesh/_whitenoise_generics.h @@ -1,3 +1,13 @@ +static int +mkname(_has_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs) +{ + ptrdiff_t irel[3]; + int d; + irel[2] = iabs[2] - self->start[2]; + if(irel[2] >= 0 && irel[2] < self->size[2]) return 1; + return 0; +} + static void mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delta_k, FLOAT re, FLOAT im) { @@ -19,10 +29,46 @@ mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delt static void mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed) { + + clock_t start, end; + double cpu_time_used; + + start = clock(); + /* Fill delta_k with gadget scheme */ - int d; int i, j, k; + int signs[3]; + + { + int compressed = 1; + ptrdiff_t iabs[3] = {self->start[0], self->start[1], 0}; + + /* if no negative k modes are requested, do not work with negative sign; + * this saves half of the computing time. */ + + for(k = self->Nmesh[2] / 2 + 1; k < self->Nmesh[2]; k ++) { + iabs[2] = k; + if (mkname(_has_mode)(self, iabs)) { + compressed = 0; + break; + } + } + printf("compressed = %d\n", compressed); + if (compressed) { + /* only half of the fourier space is requested, ignore the conjugates */ + signs[0] = 1; + signs[1] = 0; + signs[2] = 0; + } else { + /* full fourier space field is requested */ + /* do negative then positive. ordering is import to makesure the positive overwrites nyquist. */ + signs[0] = -1; + signs[1] = 1; + signs[2] = 0; + } + } + gsl_rng * rng = gsl_rng_alloc(gsl_rng_ranlxd1); gsl_rng_set(rng, seed); @@ -46,6 +92,14 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed) } gsl_rng_free(rng); + end = clock(); + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("time used in seeds = %g\n", cpu_time_used); + + start = end; + ptrdiff_t skipped = 0; + ptrdiff_t used = 0; + for(i = self->start[0]; i < self->start[0] + self->size[0]; i ++) { @@ -73,8 +127,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed) d2 = 1; } - int sign; /* sign in the k plane */ - for(sign = -1; sign <= 1; sign += 2) { + int isign; + for(isign = 0; signs[isign] != 0; isign ++) { + int sign = signs[isign]; + unsigned int seed_lower, seed_this; /* the lower quadrant generator */ @@ -103,14 +159,24 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed) SAMPLE(this_rng, &l, &phase); } - /* we want two numbers that are of std ~ 1/sqrt(2) */ - ampl = sqrt(- log(ampl)); + ptrdiff_t iabs[3] = {i, j, k}; - /* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */ - if(self->unitary) - ampl = 1.0; + /* mode is not there, skip it */ + if(!mkname(_has_mode)(self, iabs)) { + skipped ++; + continue; + } else { + used ++; + } - ptrdiff_t iabs[3] = {i, j, k}; + /* we want two numbers that are of std ~ 1/sqrt(2) */ + if(self->unitary) { + /* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */ + ampl = 1.0; + } else { + /* box-mueller */ + ampl = sqrt(- log(ampl)); + } FLOAT re = ampl * cos(phase); FLOAT im = ampl * sin(phase); @@ -154,6 +220,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed) gsl_rng_free(lower_rng); gsl_rng_free(this_rng); } + end = clock(); + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("time used in fill = %g\n", cpu_time_used); + printf("skipped = %td used = %td\n", skipped, used); } /* Footnotes */ diff --git a/pmesh/_whitenoise_imp.c b/pmesh/_whitenoise_imp.c index bc82532..c47324b 100644 --- a/pmesh/_whitenoise_imp.c +++ b/pmesh/_whitenoise_imp.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/pmesh/pm.py b/pmesh/pm.py index 91761e8..683c713 100644 --- a/pmesh/pm.py +++ b/pmesh/pm.py @@ -7,7 +7,7 @@ import numbers # for testing Numbers import warnings - +import functools def is_inplace(out): return out is Ellipsis @@ -412,7 +412,7 @@ def resample(self, out): # ensure the down sample is real for i, slab in zip(complex.slabs.i, complex.slabs): - mask = numpy.bitwise_and.reduce( + mask = functools.reduce(numpy.bitwise_and, [(n - ii) % n == ii for ii, n in zip(i, complex.Nmesh)]) slab.imag[mask] = 0 @@ -420,13 +420,13 @@ def resample(self, out): # remove the nyquist of the output # FIXME: the nyquist is messy due to hermitian constraints # let's do not touch them till we know they are important. - mask = numpy.bitwise_or.reduce( + mask = functools.reduce(numpy.bitwise_or, [ ii == n // 2 for ii, n in zip(i, complex.Nmesh)]) slab[mask] = 0 # also remove the nyquist of the input - mask = numpy.bitwise_or.reduce( + mask = functools.reduce(numpy.bitwise_or, [ ii == n // 2 for ii, n in zip(i, self.Nmesh)]) slab[mask] = 0 @@ -474,7 +474,7 @@ def preview(self, Nmesh=None, axes=None, resampler=None, method=None): if all(Nmesh == self.pm.Nmesh): Nmesh = None if Nmesh is not None: - pm = self.pm.resize(Nmesh) + pm = self.pm.reshape(Nmesh) if method is None: if any(Nmesh < self.pm.Nmesh): method = 'downsample' else : method = 'upsample' @@ -1005,8 +1005,26 @@ class ParticleMesh(object): """ - def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_method='estimate', resampler='cic'): - """ create a PM object. """ + def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', + plan_method='estimate', resampler='cic', transposed=True): + """ create a PM object. + + Parameters + ---------- + transposed : bool + if True, use the transposed data decomposition in fourier space; + transposed data decomposition results faster transforms, + but makes the whitenoise generation in 3d very slow. + plan_method : string + method for planning, `estimate`, `exhaustive`, `measure`. + resampler : string or ResampleWindow + used to determine the default size of the domain decomposition + np : the process mesh + if None, automatically infer -- (n-1)d decomposition on (n)d mesh, + Nmesh : tuple or alike + size of the mesh. len(Nmesh) is the dimension of the system. + + """ if comm is None: comm = MPI.COMM_WORLD @@ -1020,6 +1038,8 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth elif len(Nmesh) == 1: np = [] + self.np = np + if len(np) == len(Nmesh): # only implemented for non-padded and destroy input self._use_padded = False @@ -1065,7 +1085,7 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth _cache_args = (tuple(self.Nmesh), tuple(self.BoxSize), MPI._addressof(comm), comm.rank, comm.size, - tuple(np), self.dtype, plan_method) + tuple(np), self.dtype, plan_method, paddedflag, transposed) template = _pm_cache.get(_cache_args, None) @@ -1084,10 +1104,20 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth else: self.procmesh = pfft.ProcMesh(np, comm=comm) + if transposed: + partition_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag + forward_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag + backward_flags = pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag + else: + partition_flags = paddedflag + forward_flags = paddedflag + backward_flags = paddedflag + + self.transposed = transposed self.partition = pfft.Partition(forward, self.Nmesh, self.procmesh, - pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag) + partition_flags) bufferin = pfft.LocalBuffer(self.partition) bufferout = pfft.LocalBuffer(self.partition) @@ -1106,17 +1136,17 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth else: self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD, bufferin, bufferout, forward, - plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag) + plan_method | forward_flags) self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD, bufferout, bufferin, backward, - plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag) + plan_method | backward_flags) self.ipforward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD, bufferin, bufferin, forward, - plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag) + plan_method | forward_flags) self.ipbackward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD, bufferout, bufferout, backward, - plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag) + plan_method | backward_flags) self.domain = domain.GridND(self.partition.i_edges, comm=self.comm) @@ -1177,27 +1207,39 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth _pm_cache[_cache_args] = self def resize(self, Nmesh): + warnings.warn("ParticleMesh.resize method is deprecated. Use reshape method", DeprecationWarning) + return self.reshape(Nmesh=Nmesh) + + def reshape(self, Nmesh=None, transposed=None): """ - Create a resized ParticleMesh object, changing the resolution Nmesh. + Create a reshaped ParticleMesh object, changing the resolution Nmesh, or even + dimension. Parameters ---------- Nmesh : int or array_like or None The new resolution + transposed : boolean + The new transposed format. Returns ------- - A ParticleMesh of the given resolution. If Nmesh is None - or the same as ``self.Nmesh``, a reference of ``self`` is returned. + A ParticleMesh of the given resolution and transpose property """ if Nmesh is None: Nmesh = self.Nmesh Nmesh_ = self.Nmesh.copy() Nmesh_[...] = Nmesh - if all(self.Nmesh == Nmesh_): return self + + if transposed is None: + transposed = self.transposed return ParticleMesh(BoxSize=self.BoxSize, Nmesh=Nmesh_, - dtype=self.dtype, comm=self.comm) + dtype=self.dtype, + comm=self.comm, + resampler=self.resampler, + np=self.np, + transposed=self.transposed) def create(self, mode, base=None, value=None, zeros=False): """ @@ -1271,7 +1313,7 @@ def generate_whitenoise(self, seed, unitary=False, mean=0, mode='complex', base= # add mean def filter(k, v): - mask = numpy.bitwise_and.reduce([ki == 0 for ki in k]) + mask = functools.reduce(numpy.bitwise_and, [ki == 0 for ki in k]) v[mask] = mean return v diff --git a/pmesh/tests/test_pm.py b/pmesh/tests/test_pm.py index 512f281..5354dba 100644 --- a/pmesh/tests/test_pm.py +++ b/pmesh/tests/test_pm.py @@ -84,6 +84,28 @@ def test_fft(comm): real2 = complex.c2r() assert_almost_equal(numpy.asarray(real), numpy.asarray(real2), decimal=7) +@MPITest(commsize=(1,4)) +def test_whitenoise_untransposed(comm): + pm_ut = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=False) + pm_t = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=True) + + f1 = pm_ut.generate_whitenoise(seed=3333, mode='complex') + f2 = pm_t.generate_whitenoise(seed=3333, mode='complex') + + f1r = numpy.concatenate(comm.allgather(numpy.array(f1.ravel()))) + f2r = numpy.concatenate(comm.allgather(numpy.array(f2.ravel()))) + + assert_array_equal(f1r, f2r) + + # this should have asserted r2c transforms as well. + r1 = pm_ut.generate_whitenoise(seed=3333, mode='real') + r2 = pm_t.generate_whitenoise(seed=3333, mode='real') + + r1r = numpy.concatenate(comm.allgather(numpy.array(r1.ravel()))) + r2r = numpy.concatenate(comm.allgather(numpy.array(r2.ravel()))) + + assert_array_equal(r1r, r2r) + @MPITest(commsize=(1,4)) def test_inplace_fft(comm): pm = ParticleMesh(BoxSize=8.0, Nmesh=[8, 8], comm=comm, dtype='f8')