Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 38 additions & 7 deletions archive/engines/projectional_pycuda_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ def engine_prepare(self):
prep.err_exit_gpu = gpuarray.to_gpu(prep.err_exit)
if self.do_position_refinement:
prep.error_state_gpu = gpuarray.empty_like(prep.err_fourier_gpu)
kern = self.kernels[prep.label]
if kern.resample:
kern.aux_tmp1_gpu = gpuarray.to_gpu(kern.aux_tmp1)
kern.aux_tmp2_gpu = gpuarray.to_gpu(kern.aux_tmp2)

ma = self.ma.S[dID].data.astype(np.float32)
prep.ma = cuda.pagelocked_empty(ma.shape, ma.dtype, order="C", mem_flags=4)
prep.ma[:] = ma
Expand Down Expand Up @@ -505,13 +510,20 @@ def engine_iterate(self, num=1):
pbound = self.pbound_scan[prep.label]
aux = kern.aux
PROP = kern.PROP

# set streams
queue = streamdata.queue
FUK.queue = queue
AWK.queue = queue
POK.queue = queue
PROP.queue = queue
resample = kern.resample
if resample:
ABS_SQR = kern.ABS_SQR
RSMP = kern.RSMP
RSMP.queue = queue
aux_tmp1 = kern.aux_tmp1_gpu
aux_tmp2 = kern.aux_tmp2_gpu

# get addresses and auxilliary array
addr = prep.addr_gpu
Expand Down Expand Up @@ -546,7 +558,12 @@ def engine_iterate(self, num=1):
t1 = time.time()
AWK.build_aux_no_ex(aux, addr, ob, pr)
PROP.fw(aux, aux)
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.log_likelihood(aux_tmp2, addr, mag, ma, err_phot, aux_is_intensity=True)
else:
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
self.benchmark.F_LLerror += time.time() - t1

## prep + forward FFT
Expand All @@ -561,19 +578,33 @@ def engine_iterate(self, num=1):

## Deviation from measured data
t1 = time.time()
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.fourier_error(aux_tmp2, addr, mag, ma, ma_sum, aux_is_intensity=True)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux_tmp2, addr, mag, ma, err_fourier, pbound, mult=False)
RSMP.resample(aux_tmp1, aux_tmp2)
# aux *= aux_tmp1 , but with stream
aux._elwise_multiply(aux_tmp1, aux, stream=queue)
else:
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
self.benchmark.C_Fourier_update += time.time() - t1
streamdata.record_done_ma(dID)

## Backward FFT
t1 = time.time()
PROP.bw(aux, aux)
## apply changes
#AWK.build_exit(aux, addr, ob, pr, ex, alpha=self.p.alpha)
AWK.make_exit(aux, addr, ob, pr, ex, c_a=self._b, c_po=self._a, c_e=-(self._a + self._b))
FUK.exit_error(aux, addr)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.exit_error(aux_tmp2, addr, aux_is_intensity=True)
else:
FUK.exit_error(aux, addr)
FUK.error_reduce(addr, err_exit)
self.benchmark.E_Build_exit += time.time() - t1

Expand Down
24 changes: 24 additions & 0 deletions ptypy/accelerate/base/array_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'''
import numpy as np
from scipy import ndimage as ndi
from ptypy import utils as u


def dot(A, B, acc_dtype=np.float64):
Expand Down Expand Up @@ -148,3 +149,26 @@ def crop_pad_2d_simple(A, B):
b1, b2 = B.shape[-2:]
offset = [0, a1 // 2 - b1 // 2, a2 // 2 - b2 // 2]
fill3D(A, B, offset)

def resample(A, B):
"""
Resamples the last two dimensions of B onto shape of A and places it in A.
The ratio between shapes needs to be a power of 2 along the last two dimension.
upsampling (A larger than B): nearest neighbour interpolation
downsampling (B larger than A): integrate over neighbouring regions
"""
assert A.ndim > 2, "Arrays must have at least 2 dimensions"
assert B.ndim > 2, "Arrays must have at least 2 dimensions"
assert A.shape[:-2] == B.shape[:-2], "Arrays must have same shape expect along the last 2 dimensions"
assert A.shape[-2] == A.shape[-1], "Last two dimensions must be of equal length"
assert B.shape[-2] == B.shape[-2], "Last two dimensions must be of equal length"
# same sampling, no need to call this function
assert A.shape != B.shape, "A and B have the same shape, no need to do any resampling"
# upsampling
if A.shape[-1] > B.shape[-1]:
resample = A.shape[-1] // B.shape[-1]
A[:] = u.repeat_2d(B, resample) / (resample**2)
# downsampling
elif A.shape[-1] < B.shape[-1]:
resample = B.shape[-1] // A.shape[-1]
A[:] = u.rebin_2d(B, resample) * (resample**2)
39 changes: 36 additions & 3 deletions ptypy/accelerate/base/engines/ML_serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ptypy.engines import register
from ptypy.accelerate.base.kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel
from ptypy.accelerate.base import address_manglers
from ptypy.accelerate.base import array_utils as au


__all__ = ['ML_serial']
Expand Down Expand Up @@ -90,6 +91,13 @@ def _setup_kernels(self):
kern.a = np.zeros(ash, dtype=np.complex64)
kern.b = np.zeros(ash, dtype=np.complex64)

# create extra array for resampling (if needed)
kern.resample = scan.resample > 1
if kern.resample:
ish = (ash[0],) + tuple(geo.shape//scan.resample)
kern.aux_tmp1 = np.zeros(ash, dtype=np.float32)
kern.aux_tmp2 = np.zeros(ish, dtype=np.float32)

# setup kernels, one for each SCAN.
kern.GDK = GradientDescentKernel(aux, nmodes)
kern.GDK.allocate()
Expand Down Expand Up @@ -382,6 +390,13 @@ def prepare(self):
prep = self.engine.diff_info[d.ID]
prep.weights = (self.Irenorm * self.engine.ma.S[d.ID].data
/ (1. / self.Irenorm + d.data)).astype(d.data.dtype)
prep.intensity = self.engine.di.S[d.ID].data
kern = self.engine.kernels[prep.label]
if kern.resample:
prep.weights2 = np.zeros((prep.weights.shape[0],) + kern.aux_tmp1.shape[-2:], dtype=prep.weights.dtype)
au.resample(prep.weights2, prep.weights)
prep.intensity2 = np.zeros((prep.intensity.shape[0],) + kern.aux_tmp1.shape[-2:], dtype=prep.weights.dtype)
au.resample(prep.intensity2, prep.intensity)

def __del__(self):
"""
Expand Down Expand Up @@ -423,7 +438,8 @@ def new_grad(self):

# get addresses and auxilliary array
addr = prep.addr
w = prep.weights
w = prep.weights2
I = prep.intensity2
err_phot = prep.err_phot
fic = prep.float_intens_coeff

Expand All @@ -432,6 +448,12 @@ def new_grad(self):
obg = ob_grad.S[oID].data
pr = self.engine.pr.S[pID].data
prg = pr_grad.S[pID].data

# resampling
if kern.resample:
aux_tmp1 = kern.aux_tmp1
aux_tmp2 = kern.aux_tmp2

I = prep.I

# make propagated exit (to buffer)
Expand All @@ -440,7 +462,13 @@ def new_grad(self):
# forward prop
aux[:] = FW(aux)

GDK.make_model(aux, addr)
if kern.resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
au.resample(aux_tmp1, aux_tmp2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks odd. Shouldn't the second resample be after make model?

GDK.make_model(aux_tmp1, addr, aux_is_intensity=True)
else:
GDK.make_model(aux, addr)

if self.p.floating_intensities:
GDK.floating_intensity(addr, w, I, fic)
Expand Down Expand Up @@ -505,14 +533,19 @@ def poly_line_coeffs(self, c_ob_h, c_pr_h):
# get addresses and auxilliary array
addr = prep.addr
w = prep.weights
I = prep.intensity
fic = prep.float_intens_coeff

# local references
ob = self.ob.S[oID].data
ob_h = c_ob_h.S[oID].data
pr = self.pr.S[pID].data
pr_h = c_pr_h.S[pID].data
I = self.di.S[dID].data

# resampling
if kern.resample:
w = prep.weights2
I = prep.intensity2

# make propagated exit (to buffer)
AWK.build_aux_no_ex(f, addr, ob, pr, add=False)
Expand Down
47 changes: 41 additions & 6 deletions ptypy/accelerate/base/engines/projectional_serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,18 @@ def _setup_kernels(self):
aux = np.zeros(ash, dtype=np.complex64)
kern.aux = aux

# create extra array for resampling (if needed)
kern.resample = scan.resample > 1
if kern.resample:
ish = (ash[0],) + tuple(geo.shape//scan.resample)
kern.aux_tmp1 = np.zeros(ash, dtype=np.float32)
kern.aux_tmp2 = np.zeros(ish, dtype=np.float32)
aux_f = kern.aux_tmp2 # making sure to pass the correct shapes to FUK
else:
aux_f = aux

# setup kernels, one for each SCAN.
kern.FUK = FourierUpdateKernel(aux, nmodes)
kern.FUK = FourierUpdateKernel(aux_f, nmodes)
kern.FUK.allocate()

kern.POK = PoUpdateKernel()
Expand Down Expand Up @@ -266,6 +276,12 @@ def engine_iterate(self, num=1):
pbound = self.pbound_scan[prep.label]
aux = kern.aux

# resampling
resample = kern.resample
if resample:
aux_tmp1 = kern.aux_tmp1
aux_tmp2 = kern.aux_tmp2

# local references
ma = prep.ma
ob = self.ob.S[oID].data
Expand All @@ -277,7 +293,12 @@ def engine_iterate(self, num=1):
t1 = time.time()
AWK.build_aux_no_ex(aux, addr, ob, pr)
aux[:] = FW(aux)
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.log_likelihood(aux_tmp2, addr, mag, ma, err_phot, aux_is_intensity=True)
else:
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
self.benchmark.F_LLerror += time.time() - t1

## build auxilliary wave
Expand All @@ -292,9 +313,18 @@ def engine_iterate(self, num=1):

## Deviation from measured data
t1 = time.time()
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.fourier_error(aux_tmp2, addr, mag, ma, ma_sum, aux_is_intensity=True)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux_tmp2, addr, mag, ma, err_fourier, pbound, mult=False)
au.resample(aux_tmp1, aux_tmp2)
aux *= aux_tmp1
else:
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
self.benchmark.C_Fourier_update += time.time() - t1

## backward FFT
Expand All @@ -305,7 +335,12 @@ def engine_iterate(self, num=1):
## build exit wave
t1 = time.time()
AWK.make_exit(aux, addr, ob, pr, ex, c_a=self._b, c_po=self._a, c_e=-(self._a+self._b))
FUK.exit_error(aux,addr)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.exit_error(aux_tmp2, addr, aux_is_intensity=True)
else:
FUK.exit_error(aux,addr)
FUK.error_reduce(addr, err_exit)
self.benchmark.E_Build_exit += time.time() - t1

Expand Down
Loading