diff --git a/pmesh/_whitenoise_generics.h b/pmesh/_whitenoise_generics.h
index 85cf745..1a5d20b 100644
--- a/pmesh/_whitenoise_generics.h
+++ b/pmesh/_whitenoise_generics.h
@@ -1,3 +1,13 @@
+static int
+mkname(_has_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs)
+{
+    ptrdiff_t irel[3];
+    int d;
+    irel[2] = iabs[2] - self->start[2];
+    if(irel[2] >= 0 && irel[2] < self->size[2]) return 1;
+    return 0;
+}
+
 static void
 mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delta_k, FLOAT re, FLOAT im)
 {
@@ -19,10 +29,46 @@ mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delt
 static void
 mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
 {
+
+    clock_t start, end;
+    double cpu_time_used;
+
+    start = clock();
+
     /* Fill delta_k with gadget scheme */
-    int d;
     int i, j, k;
 
+    int signs[3];
+
+    {
+        int compressed = 1;
+        ptrdiff_t iabs[3] = {self->start[0], self->start[1], 0};
+
+        /* if no negative k modes are requested, do not work with negative sign;
+         * this saves half of the computing time. */
+
+        for(k = self->Nmesh[2] / 2 + 1; k < self->Nmesh[2]; k ++) {
+            iabs[2] = k;
+            if (mkname(_has_mode)(self, iabs)) {
+                compressed = 0;
+                break;
+            }
+        }
+        printf("compressed = %d\n", compressed);
+        if (compressed) {
+            /* only half of the fourier space is requested, ignore the conjugates */
+            signs[0] = 1;
+            signs[1] = 0;
+            signs[2] = 0;
+        } else {
+            /* full fourier space field is requested */
+            /* do negative then positive. ordering is import to makesure the positive overwrites nyquist. */
+            signs[0] = -1;
+            signs[1] = 1;
+            signs[2] = 0;
+        }
+    }
+
     gsl_rng * rng = gsl_rng_alloc(gsl_rng_ranlxd1);
     gsl_rng_set(rng, seed);
 
@@ -46,6 +92,14 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
     }
     gsl_rng_free(rng);
 
+    end = clock();
+    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+    printf("time used in seeds = %g\n", cpu_time_used);
+
+    start = end;
+    ptrdiff_t skipped = 0;
+    ptrdiff_t used = 0;
+
     for(i = self->start[0];
         i < self->start[0] + self->size[0];
         i ++) {
@@ -73,8 +127,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
                 d2 = 1;
             }
 
-            int sign;   /* sign in the k plane */
-            for(sign = -1; sign <= 1; sign += 2) {
+            int isign;
+            for(isign = 0; signs[isign] != 0; isign ++) {
+                int sign = signs[isign];
+
                 unsigned int seed_lower, seed_this;
 
                 /* the lower quadrant generator */
@@ -103,14 +159,24 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
                         SAMPLE(this_rng, &ampl, &phase);
                     }
 
-                    /* we want two numbers that are of std ~ 1/sqrt(2) */
-                    ampl = sqrt(- log(ampl));
+                    ptrdiff_t iabs[3] = {i, j, k};
 
-                    /* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */
-                    if(self->unitary)
-                        ampl = 1.0;
+                    /* mode is not there, skip it */
+                    if(!mkname(_has_mode)(self, iabs)) {
+                        skipped ++;
+                        continue;
+                    } else {
+                        used ++;
+                    }
 
-                    ptrdiff_t iabs[3] = {i, j, k};
+                    /* we want two numbers that are of std ~ 1/sqrt(2) */
+                    if(self->unitary) {
+                        /* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */
+                        ampl = 1.0;
+                    } else {
+                        /* box-mueller */
+                        ampl = sqrt(- log(ampl));
+                    }
 
                     FLOAT re = ampl * cos(phase);
                     FLOAT im = ampl * sin(phase);
@@ -154,6 +220,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
         gsl_rng_free(lower_rng);
         gsl_rng_free(this_rng);
     }
+    end = clock();
+    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+    printf("time used in fill = %g\n", cpu_time_used);
+    printf("skipped = %td used = %td\n", skipped, used);
 }
 
 /* Footnotes */ 
diff --git a/pmesh/_whitenoise_imp.c b/pmesh/_whitenoise_imp.c
index bc82532..c47324b 100644
--- a/pmesh/_whitenoise_imp.c
+++ b/pmesh/_whitenoise_imp.c
@@ -2,6 +2,7 @@
 #include <stdint.h>
 #include <string.h>
 #include <math.h>
+#include <time.h>
 
 #include <gsl/config.h>
 #include <gsl/gsl_rng.h>
diff --git a/pmesh/pm.py b/pmesh/pm.py
index 91761e8..683c713 100644
--- a/pmesh/pm.py
+++ b/pmesh/pm.py
@@ -7,7 +7,7 @@
 
 import numbers # for testing Numbers
 import warnings
-
+import functools
 def is_inplace(out):
     return out is Ellipsis
 
@@ -412,7 +412,7 @@ def resample(self, out):
 
         # ensure the down sample is real
         for i, slab in zip(complex.slabs.i, complex.slabs):
-            mask = numpy.bitwise_and.reduce(
+            mask = functools.reduce(numpy.bitwise_and,
                  [(n - ii) % n == ii
                     for ii, n in zip(i, complex.Nmesh)])
             slab.imag[mask] = 0
@@ -420,13 +420,13 @@ def resample(self, out):
             # remove the nyquist of the output
             # FIXME: the nyquist is messy due to hermitian constraints
             # let's do not touch them till we know they are important.
-            mask = numpy.bitwise_or.reduce(
+            mask = functools.reduce(numpy.bitwise_or,
                  [ ii == n // 2
                    for ii, n in zip(i, complex.Nmesh)])
             slab[mask] = 0
 
             # also remove the nyquist of the input
-            mask = numpy.bitwise_or.reduce(
+            mask = functools.reduce(numpy.bitwise_or,
                  [ ii == n // 2
                    for ii, n in zip(i, self.Nmesh)])
             slab[mask] = 0
@@ -474,7 +474,7 @@ def preview(self, Nmesh=None, axes=None, resampler=None, method=None):
             if all(Nmesh == self.pm.Nmesh): Nmesh = None
 
         if Nmesh is not None:
-            pm = self.pm.resize(Nmesh)
+            pm = self.pm.reshape(Nmesh)
             if method is None:
                 if any(Nmesh < self.pm.Nmesh): method = 'downsample'
                 else : method = 'upsample'
@@ -1005,8 +1005,26 @@ class ParticleMesh(object):
 
     """
 
-    def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_method='estimate', resampler='cic'):
-        """ create a PM object.  """
+    def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8',
+                    plan_method='estimate', resampler='cic', transposed=True):
+        """ create a PM object.  
+
+            Parameters
+            ----------
+            transposed : bool
+                if True, use the transposed data decomposition in fourier space;
+                transposed data decomposition results faster transforms, 
+                but makes the whitenoise generation in 3d very slow.
+            plan_method : string
+                method for planning, `estimate`, `exhaustive`, `measure`.
+            resampler : string or ResampleWindow
+                used to determine the default size of the domain decomposition
+            np : the process mesh
+                if None, automatically infer -- (n-1)d decomposition on (n)d mesh,
+            Nmesh : tuple or alike
+                size of the mesh. len(Nmesh) is the dimension of the system.
+
+        """
         if comm is None:
             comm = MPI.COMM_WORLD
 
@@ -1020,6 +1038,8 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
             elif len(Nmesh) == 1:
                 np = []
 
+        self.np = np
+
         if len(np) == len(Nmesh):
             # only implemented for non-padded and destroy input
             self._use_padded = False
@@ -1065,7 +1085,7 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
 
         _cache_args = (tuple(self.Nmesh), tuple(self.BoxSize),
                        MPI._addressof(comm), comm.rank, comm.size,
-                       tuple(np), self.dtype, plan_method)
+                       tuple(np), self.dtype, plan_method, paddedflag, transposed)
 
         template = _pm_cache.get(_cache_args, None)
 
@@ -1084,10 +1104,20 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
         else:
             self.procmesh = pfft.ProcMesh(np, comm=comm)
 
+        if transposed:
+            partition_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag
+            forward_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag
+            backward_flags = pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag
+        else:
+            partition_flags = paddedflag
+            forward_flags = paddedflag
+            backward_flags = paddedflag
+
+        self.transposed = transposed
         self.partition = pfft.Partition(forward,
             self.Nmesh,
             self.procmesh,
-            pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
+            partition_flags)
 
         bufferin = pfft.LocalBuffer(self.partition)
         bufferout = pfft.LocalBuffer(self.partition)
@@ -1106,17 +1136,17 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
         else:
             self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
                     bufferin, bufferout, forward,
-                    plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
+                    plan_method | forward_flags)
             self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
                     bufferout, bufferin, backward,
-                    plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag)
+                    plan_method  | backward_flags)
 
             self.ipforward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
                     bufferin, bufferin, forward,
-                    plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
+                    plan_method | forward_flags)
             self.ipbackward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
                     bufferout, bufferout, backward,
-                    plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag)
+                    plan_method | backward_flags)
 
         self.domain = domain.GridND(self.partition.i_edges, comm=self.comm)
 
@@ -1177,27 +1207,39 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
         _pm_cache[_cache_args] = self
 
     def resize(self, Nmesh):
+        warnings.warn("ParticleMesh.resize method is deprecated. Use reshape method", DeprecationWarning)
+        return self.reshape(Nmesh=Nmesh)
+
+    def reshape(self, Nmesh=None, transposed=None):
         """
-            Create a resized ParticleMesh object, changing the resolution Nmesh.
+            Create a reshaped ParticleMesh object, changing the resolution Nmesh, or even
+            dimension.
 
             Parameters
             ----------
             Nmesh : int or array_like or None
                 The new resolution
+            transposed : boolean
+                The new transposed format.
 
             Returns
             -------
-            A ParticleMesh of the given resolution. If Nmesh is None
-            or the same as ``self.Nmesh``, a reference of ``self`` is returned.
+            A ParticleMesh of the given resolution and transpose property
         """
         if Nmesh is None: Nmesh = self.Nmesh
         Nmesh_ = self.Nmesh.copy()
         Nmesh_[...] = Nmesh
-        if all(self.Nmesh == Nmesh_): return self
+
+        if transposed is None:
+            transposed = self.transposed
 
         return ParticleMesh(BoxSize=self.BoxSize,
                             Nmesh=Nmesh_,
-                            dtype=self.dtype, comm=self.comm)
+                            dtype=self.dtype,
+                            comm=self.comm,
+                            resampler=self.resampler,
+                            np=self.np,
+                            transposed=self.transposed)
 
     def create(self, mode, base=None, value=None, zeros=False):
         """
@@ -1271,7 +1313,7 @@ def generate_whitenoise(self, seed, unitary=False, mean=0, mode='complex', base=
 
         # add mean
         def filter(k, v):
-            mask = numpy.bitwise_and.reduce([ki == 0 for ki in k])
+            mask = functools.reduce(numpy.bitwise_and, [ki == 0 for ki in k])
             v[mask] = mean
             return v
 
diff --git a/pmesh/tests/test_pm.py b/pmesh/tests/test_pm.py
index 512f281..5354dba 100644
--- a/pmesh/tests/test_pm.py
+++ b/pmesh/tests/test_pm.py
@@ -84,6 +84,28 @@ def test_fft(comm):
     real2 = complex.c2r()
     assert_almost_equal(numpy.asarray(real), numpy.asarray(real2), decimal=7)
 
+@MPITest(commsize=(1,4))
+def test_whitenoise_untransposed(comm):
+    pm_ut = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=False)
+    pm_t = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=True)
+
+    f1 = pm_ut.generate_whitenoise(seed=3333, mode='complex')
+    f2 = pm_t.generate_whitenoise(seed=3333, mode='complex')
+
+    f1r = numpy.concatenate(comm.allgather(numpy.array(f1.ravel())))
+    f2r = numpy.concatenate(comm.allgather(numpy.array(f2.ravel())))
+
+    assert_array_equal(f1r, f2r)
+
+    # this should have asserted r2c transforms as well.
+    r1 = pm_ut.generate_whitenoise(seed=3333, mode='real')
+    r2 = pm_t.generate_whitenoise(seed=3333, mode='real')
+
+    r1r = numpy.concatenate(comm.allgather(numpy.array(r1.ravel())))
+    r2r = numpy.concatenate(comm.allgather(numpy.array(r2.ravel())))
+
+    assert_array_equal(r1r, r2r)
+
 @MPITest(commsize=(1,4))
 def test_inplace_fft(comm):
     pm = ParticleMesh(BoxSize=8.0, Nmesh=[8, 8], comm=comm, dtype='f8')