diff --git a/ptypy/custom/LBFGS.py b/ptypy/custom/LBFGS.py new file mode 100644 index 000000000..673f58600 --- /dev/null +++ b/ptypy/custom/LBFGS.py @@ -0,0 +1,383 @@ +# -*- coding: utf-8 -*- +""" +Limited-memory BFGS reconstruction engine. + +This file is part of the PTYPY package. + + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import numpy as np +import time + +#from ptypy import utils as u +from ptypy.utils.verbose import logger +#from ptypy.utils import parallel +from ptypy.engines.utils import Cnorm2, Cdot +from ptypy.engines import register +from ptypy.engines.ML import ML +from ptypy.core.manager import Full, Vanilla, Bragg3dModel, BlockVanilla, BlockFull, GradFull, BlockGradFull + + +__all__ = ['LBFGS'] + + +@register() +class LBFGS(ML): + """ + Limited-memory BFGS reconstruction engine. + + + Defaults: + + [name] + default = LBFGS + type = str + help = + doc = + + [ML_type] + default = 'gaussian' + type = str + help = Likelihood model + choices = ['gaussian','poisson','euclid'] + doc = One of ‘gaussian’, poisson’ or ‘euclid’. Only 'gaussian' is implemented. + + [floating_intensities] + default = False + type = bool + help = Adaptive diffraction pattern rescaling + doc = If True, allow for adaptative rescaling of the diffraction pattern intensities (to correct for incident beam intensity fluctuations). + + [intensity_renormalization] + default = 1. + type = float + lowlim = 0.0 + help = Rescales the intensities so they can be interpreted as Poisson counts. + + [reg_del2] + default = False + type = bool + help = Whether to use a Gaussian prior (smoothing) regularizer + + [reg_del2_amplitude] + default = .01 + type = float + lowlim = 0.0 + help = Amplitude of the Gaussian prior if used + + [smooth_gradient] + default = 0.0 + type = float + help = Smoothing preconditioner + doc = Sigma for gaussian filter (turned off if 0.) + + [smooth_gradient_decay] + default = 0. + type = float + help = Decay rate for smoothing preconditioner + doc = Sigma for gaussian filter will reduce exponentially at this rate + + [scale_precond] + default = False + type = bool + help = Whether to use the object/probe scaling preconditioner + doc = This parameter can give faster convergence for weakly scattering samples. + + [scale_probe_object] + default = 1. + type = float + lowlim = 0.0 + help = Relative scale of probe to object + + [probe_update_start] + default = 2 + type = int + lowlim = 0 + help = Number of iterations before probe update starts + + [wavefield_precond] + default = False + type = bool + help = Whether to use the wavefield preconditioner + doc = This parameter can give faster convergence. + + [wavefield_delta_object] + default = 0.1 + type = float + help = Wavefield preconditioner damping constant for the object. + + [wavefield_delta_probe] + default = 0.1 + type = float + help = Wavefield preconditioner damping constant for the probe. + + [bfgs_memory_size] + default = 5 + type = int + lowlim = 2 + help = Number of BFGS updates to store + """ + + SUPPORTED_MODELS = [Full, Vanilla, Bragg3dModel, BlockVanilla, BlockFull, GradFull, BlockGradFull] + + def __init__(self, ptycho_parent, pars=None): + """ + Limited-memory BFGS reconstruction engine. + """ + super().__init__(ptycho_parent, pars) + + # Memory of object updates and gradient differences + self.ob_s = [None]*self.p.bfgs_memory_size + self.ob_y = [None]*self.p.bfgs_memory_size + + # Memory of probe updates and gradient differences + self.pr_s = [None]*self.p.bfgs_memory_size + self.pr_y = [None]*self.p.bfgs_memory_size + + # Other BFGS memories + self.rho = np.zeros(self.p.bfgs_memory_size) + self.alpha = np.zeros(self.p.bfgs_memory_size) + + self.ptycho.citations.add_article( + title='L-BFGS for Ptychography paper', + author='Fowkes J. and Daurer B.', + journal='TBA', + volume=None, + year=None, + page=None, + doi='TBA', + comment='The L-BFGS reconstruction algorithm', + ) + + def engine_initialize(self): + """ + Prepare for reconstruction. + """ + super().engine_initialize() + + # Create containers for memories of updates and gradient differences + for i in range(self.p.bfgs_memory_size): + self.ob_s[i] = self.ob.copy(self.ob.ID + '_s' + str(i), fill=0.) + self.ob_y[i] = self.ob.copy(self.ob.ID + '_y' + str(i), fill=0.) + self.pr_s[i] = self.pr.copy(self.pr.ID + '_s' + str(i), fill=0.) + self.pr_y[i] = self.pr.copy(self.pr.ID + '_y' + str(i), fill=0.) + + + + def engine_prepare(self): + """ + Last minute initialization, everything, that needs to be recalculated, + when new data arrives. + """ + super().engine_prepare() + + def engine_iterate(self, num=1): + """ + Compute `num` iterations. + """ + tg = 0. + tc = 0. + ta = time.time() + for it in range(num): + ####################################### + # Compute new gradient (same as ML) + ####################################### + t1 = time.time() + error_dct = self.ML_model.new_grad() + new_ob_grad, new_pr_grad = self.ob_grad_new, self.pr_grad_new + tg += time.time() - t1 + + if self.p.probe_update_start <= self.curiter: + # Apply probe support if needed + for name, s in new_pr_grad.storages.items(): + self.support_constraint(s) + #support = self.probe_support.get(name) + #if support is not None: + # s.data *= support + else: + new_pr_grad.fill(0.) + + # probe/object rescaling (not used for now) + if self.p.scale_precond: + cn2_new_pr_grad = Cnorm2(new_pr_grad) + cn2_new_ob_grad = Cnorm2(new_ob_grad) + if cn2_new_pr_grad > 1e-5: + scale_p_o = (self.p.scale_probe_object * cn2_new_ob_grad + / cn2_new_pr_grad) + else: + scale_p_o = self.p.scale_probe_object + if self.scale_p_o is None: + self.scale_p_o = scale_p_o + else: + self.scale_p_o = self.scale_p_o ** self.scale_p_o_memory + self.scale_p_o *= scale_p_o ** (1-self.scale_p_o_memory) + logger.debug('Scale P/O: %6.3g' % scale_p_o) + else: + self.scale_p_o = self.p.scale_probe_object + + ############################ + # LBFGS Two Loop Recursion + ############################ + if self.curiter == 0: # Initial steepest-descent step + + # Wavefield preconditioner (applied twice) + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + new_ob_grad.storages[name].data /= self.ob_fln.storages[name].data + self.p.wavefield_delta_object + new_pr_grad.storages[name].data /= self.pr_fln.storages[name].data + self.p.wavefield_delta_probe + + # Object steepest-descent step + self.ob_h -= new_ob_grad + + # Probe steepest-descent step + new_pr_grad *= self.scale_p_o # probe preconditioning (applied twice) + self.pr_h -= new_pr_grad + + else: # Two-loop LBFGS recursion + + # Wavefield preconditioner + if self.p.wavefield_precond: + for name, s in self.ob_h.storages.items(): + self.ob_h.storages[name].data *= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + self.pr_h.storages[name].data *= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + for name, s in new_ob_grad.storages.items(): + new_ob_grad.storages[name].data /= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + new_pr_grad.storages[name].data /= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + + # Memory index + mi = min(self.curiter,self.p.bfgs_memory_size) + + # Remember last object update and gradient difference + self.ob_s[mi-1] << self.ob_h + self.ob_y[mi-1] << new_ob_grad + self.ob_y[mi-1] -= self.ob_grad + + # Remember last probe update and gradient difference + self.pr_h /= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_s[mi-1] << self.pr_h + new_pr_grad *= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_y[mi-1] << new_pr_grad + self.pr_y[mi-1] -= self.pr_grad + + # Compute and store rho + self.rho[mi-1] = 1. / ( np.real(Cdot(self.ob_y[mi-1], self.ob_s[mi-1])) + + np.real(Cdot(self.pr_y[mi-1], self.pr_s[mi-1])) ) + # BFGS update + self.ob_h << new_ob_grad + self.pr_h << new_pr_grad + # Compute right-hand side of BGFS product + for i in reversed(range(mi)): + self.alpha[i] = self.rho[i]*( np.real(Cdot(self.ob_s[i], self.ob_h)) + + np.real(Cdot(self.pr_s[i], self.pr_h)) ) + + # self.ob_h -= self.alpha[i]*self.ob_y[i] + self.ob_h /= self.alpha[i] + self.ob_h -= self.ob_y[i] + self.ob_h *= self.alpha[i] + # self.pr_h -= self.alpha[i]*self.pr_y[i] + self.pr_h /= self.alpha[i] + self.pr_h -= self.pr_y[i] + self.pr_h *= self.alpha[i] + + # Compute centre of BFGS product (scaled identity) + c_num = ( np.real(Cdot(self.ob_s[mi-1], self.ob_y[mi-1])) + + np.real(Cdot(self.pr_s[mi-1], self.pr_y[mi-1])) ) + c_denom = Cnorm2(self.ob_y[mi-1]) + Cnorm2(self.pr_y[mi-1]) + self.ob_h *= (c_num/c_denom) + self.pr_h *= (c_num/c_denom) + + + # Compute left-hand side of BFGS product + for i in range(mi): + beta = self.rho[i]*( np.real(Cdot(self.ob_y[i], self.ob_h)) + + np.real(Cdot(self.pr_y[i], self.pr_h)) ) + + # self.ob_h += (self.alpha[i]-beta)*self.ob_s[i] + self.ob_h /= (self.alpha[i]-beta) + self.ob_h += self.ob_s[i] + self.ob_h *= (self.alpha[i]-beta) + # self.pr_h += (self.alpha[i]-beta)*self.pr_s[i] + self.pr_h /= (self.alpha[i]-beta) + self.pr_h += self.pr_s[i] + self.pr_h *= (self.alpha[i]-beta) + + # Wavefield preconditioner + if self.p.wavefield_precond: + for name, s in self.ob_h.storages.items(): + self.ob_h.storages[name].data /= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + self.pr_h.storages[name].data /= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + + # Flip step direction for minimisation + self.ob_h *= -1 + self.pr_h *= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_h *= -1 + + # update current gradients with new gradients + self.ob_grad << new_ob_grad + self.pr_grad << new_pr_grad + + # linesearch (same as ML) + t2 = time.time() + B = self.ML_model.poly_line_coeffs(self.ob_h, self.pr_h) + tc += time.time() - t2 + + + if np.isinf(B).any() or np.isnan(B).any(): + logger.warning( + 'Warning! inf or nan found! Trying to continue...') + B[np.isinf(B)] = 0. + B[np.isnan(B)] = 0. + + dt = self.ptycho.FType + self.tmin = dt(-.5 * B[1] / B[2]) + + # step update + self.ob_h *= self.tmin + self.pr_h *= self.tmin + self.ob += self.ob_h + self.pr += self.pr_h + + # Roll memory for overwriting + if self.curiter >= self.p.bfgs_memory_size: + self.ob_s.append(self.ob_s.pop(0)) + self.pr_s.append(self.pr_s.pop(0)) + self.ob_y.append(self.ob_y.pop(0)) + self.pr_y.append(self.pr_y.pop(0)) + self.rho = np.roll(self.rho,-1) + + # Position correction + self.position_update() + + # Allow for customized modifications at the end of each iteration + self._post_iterate_update() + + # increase iteration counter + self.curiter +=1 + + logger.info('Time spent in gradient calculation: %.2f' % tg) + logger.info(' .... in coefficient calculation: %.2f' % tc) + return error_dct # np.array([[self.ML_model.LL[0]] * 3]) + + def _post_iterate_update(self): + """ + Enables modification at the end of each LBFGS iteration. + """ + pass + + def engine_finalize(self): + """ + Delete temporary containers. + """ + super().engine_finalize() + + # Delete containers for memories of updates and gradient differences + for i in reversed(range(self.p.bfgs_memory_size)): + del self.ptycho.containers[self.ob_s[i].ID] + del self.ob_s[i] + del self.ptycho.containers[self.ob_y[i].ID] + del self.ob_y[i] + del self.ptycho.containers[self.pr_s[i].ID] + del self.pr_s[i] + del self.ptycho.containers[self.pr_y[i].ID] + del self.pr_y[i] diff --git a/ptypy/custom/LBFGS_cupy.py b/ptypy/custom/LBFGS_cupy.py new file mode 100644 index 000000000..564a977b3 --- /dev/null +++ b/ptypy/custom/LBFGS_cupy.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +Limited-memory BFGS reconstruction engine. + +TODO. + + * Implement other regularizers + +This file is part of the PTYPY package. + + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import numpy as np +import cupy as cp + +from ptypy.engines import register +from ptypy.custom.LBFGS_serial import LBFGS_serial +from ptypy.accelerate.base.engines.ML_serial import ML_serial, BaseModelSerial +from ptypy.accelerate.cuda_cupy.engines.ML_cupy import ML_cupy, GaussianModel +from ptypy import utils as u +from ptypy.utils.verbose import logger, log +from ptypy.utils import parallel +from ptypy.accelerate.cuda_cupy import get_context +from ptypy.accelerate.cuda_cupy.kernels import PropagationKernel, RealSupportKernel, FourierSupportKernel +from ptypy.accelerate.cuda_cupy.kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel +from ptypy.accelerate.cuda_cupy.array_utils import ArrayUtilsKernel, DerivativesKernel, FFTGaussianSmoothingKernel, TransposeKernel +from ptypy.accelerate.cuda_cupy.mem_utils import GpuDataManager +from ptypy.accelerate.base import address_manglers + +__all__ = ['LBFGS_cupy'] + +MAX_BLOCKS = 99999 # can be used to limit the number of blocks, simulating that they don't fit +#MAX_BLOCKS = 3 # can be used to limit the number of blocks, simulating that they don't fit + +@register() +class LBFGS_cupy(LBFGS_serial, ML_cupy): + + """ + Defaults: + + [probe_update_cuda_atomics] + default = False + type = bool + help = For GPU, use the atomics version for probe update kernel + + [object_update_cuda_atomics] + default = True + type = bool + help = For GPU, use the atomics version for object update kernel + + [use_cuda_device_memory_pool] + default = True + type = bool + help = For GPU, use a device memory pool + + [fft_lib] + default = reikna + type = str + help = Choose the pycuda-compatible FFT module. + doc = One of: + - ``'reikna'`` : the reikna packaga (fast load, competitive compute for streaming) + - ``'cuda'`` : ptypy's cuda wrapper (delayed load, but fastest compute if all data is on GPU) + - ``'skcuda'`` : scikit-cuda (fast load, slowest compute due to additional store/load stages) + choices = 'reikna','cuda','skcuda' + userlevel = 2 + """ + + def __init__(self, ptycho_parent, pars=None): + """ + Limited-memory BFGS reconstruction engine. + """ + super().__init__(ptycho_parent, pars) + + def engine_initialize(self): + """ + Prepare for LBFGS reconstruction. + """ + super().engine_initialize() + + def engine_prepare(self): + super().engine_prepare() + + def _initialize_model(self): + + # Create noise model + if self.p.ML_type.lower() == "gaussian": + self.ML_model = GaussianModel(self) + elif self.p.ML_type.lower() == "poisson": + raise NotImplementedError('Poisson norm model not yet implemented') + elif self.p.ML_type.lower() == "euclid": + raise NotImplementedError('Euclid norm model not yet implemented') + else: + raise RuntimeError("Unsupported ML_type: '%s'" % self.p.ML_type) + + def _get_smooth_gradient(self, data, sigma): + if self.p.smooth_gradient_method == "convolution": + if self.GSK.tmp is None: + self.GSK.tmp = cp.empty(data.shape, dtype=np.complex64) + try: + self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp) + except MemoryError: + raise RuntimeError("Convolution kernel too large for direct convolution on GPU", + "Please reduce parameter smooth_gradient or set smooth_gradient_method='fft'.") + elif self.p.smooth_gradient_method == "fft": + self.FGSK.filter(data, sigma) + else: + raise NotImplementedError("smooth_gradient_method should be ```convolution``` or ```fft```.") + return data + + def _smooth_gradient(self): + new_ob_grad = self.ob_grad_new + # Smoothing preconditioner decay (once per iteration) + if self.smooth_gradient: + self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) + for name, s in new_ob_grad.storages.items(): + s.gpu = self._get_smooth_gradient(s.gpu, self.smooth_gradient.sigma) + + def _get_ob_norm(self): + norm = self._get_norm(self.ob_grad_new) + return norm + + def _get_pr_norm(self): + norm = self._get_norm(self.pr_grad_new) + return norm + + def _get_dot(self, c1, c2): + dot = np.double(0.) + for name, s2 in c2.storages.items(): + s1 = c1.storages[name] + dot += self._dot_kernel(s2.gpu, s1.gpu).get()[0] + return dot + + def _get_norm(self, c): + norm = np.double(0.) + for name, s in c.storages.items(): + norm += self._dot_kernel(s.gpu, s.gpu).get()[0] + return norm + + def _replace_ob_pr_ys(self, mi): + self.cdotr_ob_ys[mi-1] = self._get_dot(self.ob_y[mi-1], + self.ob_s[mi-1]) + self.cdotr_pr_ys[mi-1] = self._get_dot(self.pr_y[mi-1], + self.pr_s[mi-1]) + self.cn2_ob_y[mi-1] = self._get_norm(self.ob_y[mi-1]) + self.cn2_pr_y[mi-1] = self._get_norm(self.pr_y[mi-1]) + + def _get_ob_pr_sh(self, k): + ob_sh = self._get_dot(self.ob_s[k], self.ob_h) + pr_sh = self._get_dot(self.pr_s[k], self.pr_h) + return ob_sh, pr_sh + + def _get_ob_pr_yh(self, k): + ob_yh = self._get_dot(self.ob_y[k], self.ob_h) + pr_yh = self._get_dot(self.pr_y[k], self.pr_h) + return ob_yh, pr_yh + + def engine_iterate(self, num=1): + """ + Compute `num` iterations. + """ + error_dct = super().engine_iterate(num) + # copy all data back to cpu + self._set_pr_ob_ref_for_data(dev='cpu', container=None, sync_copy=True) + return error_dct + + def engine_finalize(self): + """ + Clear all GPU data, pinned memory, etc + """ + super().engine_finalize() + diff --git a/ptypy/custom/LBFGS_pycuda.py b/ptypy/custom/LBFGS_pycuda.py new file mode 100644 index 000000000..833e57ef6 --- /dev/null +++ b/ptypy/custom/LBFGS_pycuda.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +""" +Limited-memory BFGS reconstruction engine. + +TODO. + + * Implement other regularizers + +This file is part of the PTYPY package. + + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import numpy as np +from pycuda import gpuarray +import pycuda.driver as cuda +import pycuda.cumath +from pycuda.tools import DeviceMemoryPool + +from ptypy.engines import register +from ptypy.custom.LBFGS_serial import LBFGS_serial +from ptypy.accelerate.base.engines.ML_serial import ML_serial, BaseModelSerial +from ptypy.accelerate.cuda_pycuda.engines.ML_pycuda import ML_pycuda, GaussianModel +from ptypy import utils as u +from ptypy.utils.verbose import logger, log +from ptypy.utils import parallel +from ptypy.accelerate.cuda_pycuda import get_context +from ptypy.accelerate.cuda_pycuda.kernels import PropagationKernel, RealSupportKernel, FourierSupportKernel +from ptypy.accelerate.cuda_pycuda.kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel +from ptypy.accelerate.cuda_pycuda.array_utils import ArrayUtilsKernel, DerivativesKernel, GaussianSmoothingKernel, TransposeKernel + +from ptypy.accelerate.cuda_pycuda.mem_utils import GpuDataManager +from ptypy.accelerate.base import address_manglers + +__all__ = ['LBFGS_pycuda'] + +MAX_BLOCKS = 99999 # can be used to limit the number of blocks, simulating that they don't fit +#MAX_BLOCKS = 3 # can be used to limit the number of blocks, simulating that they don't fit + +@register() +class LBFGS_pycuda(LBFGS_serial, ML_pycuda): + + """ + Defaults: + + [probe_update_cuda_atomics] + default = False + type = bool + help = For GPU, use the atomics version for probe update kernel + + [object_update_cuda_atomics] + default = True + type = bool + help = For GPU, use the atomics version for object update kernel + + [use_cuda_device_memory_pool] + default = True + type = bool + help = For GPU, use a device memory pool + + [fft_lib] + default = reikna + type = str + help = Choose the pycuda-compatible FFT module. + doc = One of: + - ``'reikna'`` : the reikna packaga (fast load, competitive compute for streaming) + - ``'cuda'`` : ptypy's cuda wrapper (delayed load, but fastest compute if all data is on GPU) + - ``'skcuda'`` : scikit-cuda (fast load, slowest compute due to additional store/load stages) + choices = 'reikna','cuda','skcuda' + userlevel = 2 + """ + + def __init__(self, ptycho_parent, pars=None): + """ + Limited-memory BFGS reconstruction engine. + """ + super().__init__(ptycho_parent, pars) + + def engine_initialize(self): + """ + Prepare for LBFGS reconstruction. + """ + super().engine_initialize() + + def engine_prepare(self): + super().engine_prepare() + + def _initialize_model(self): + + # Create noise model + if self.p.ML_type.lower() == "gaussian": + self.ML_model = GaussianModel(self) + elif self.p.ML_type.lower() == "poisson": + raise NotImplementedError('Poisson norm model not yet implemented') + elif self.p.ML_type.lower() == "euclid": + raise NotImplementedError('Euclid norm model not yet implemented') + else: + raise RuntimeError("Unsupported ML_type: '%s'" % self.p.ML_type) + + def _get_smooth_gradient(self, data, sigma): + if self.p.smooth_gradient_method == "convolution": + if self.GSK.tmp is None: + self.GSK.tmp = gpuarray.empty(data.shape, dtype=np.complex64) + try: + self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp) + except MemoryError: + raise RuntimeError("Convolution kernel too large for direct convolution on GPU", + "Please reduce parameter smooth_gradient or set smooth_gradient_method='fft'.") + elif self.p.smooth_gradient_method == "fft": + self.FGSK.filter(data, sigma) + else: + raise NotImplementedError("smooth_gradient_method should be ```convolution``` or ```fft```.") + return data + + def _smooth_gradient(self): + new_ob_grad = self.ob_grad_new + # Smoothing preconditioner decay (once per iteration) + if self.smooth_gradient: + self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) + for name, s in new_ob_grad.storages.items(): + s.gpu = self._get_smooth_gradient(s.gpu, self.smooth_gradient.sigma) + + def _get_ob_norm(self): + norm = self._get_norm(self.ob_grad_new) + return norm + + def _get_pr_norm(self): + norm = self._get_norm(self.pr_grad_new) + return norm + + def _get_dot(self, c1, c2): + dot = np.double(0.) + for name, s2 in c2.storages.items(): + s1 = c1.storages[name] + dot += self._dot_kernel(s2.gpu, s1.gpu).get()[0] + return dot + + def _get_norm(self, c): + norm = np.double(0.) + for name, s in c.storages.items(): + norm += self._dot_kernel(s.gpu, s.gpu).get()[0] + return norm + + def _replace_ob_pr_ys(self, mi): + self.cdotr_ob_ys[mi-1] = self._get_dot(self.ob_y[mi-1], + self.ob_s[mi-1]) + self.cdotr_pr_ys[mi-1] = self._get_dot(self.pr_y[mi-1], + self.pr_s[mi-1]) + self.cn2_ob_y[mi-1] = self._get_norm(self.ob_y[mi-1]) + self.cn2_pr_y[mi-1] = self._get_norm(self.pr_y[mi-1]) + + def _get_ob_pr_sh(self, k): + ob_sh = self._get_dot(self.ob_s[k], self.ob_h) + pr_sh = self._get_dot(self.pr_s[k], self.pr_h) + return ob_sh, pr_sh + + def _get_ob_pr_yh(self, k): + ob_yh = self._get_dot(self.ob_y[k], self.ob_h) + pr_yh = self._get_dot(self.pr_y[k], self.pr_h) + return ob_yh, pr_yh + + def engine_iterate(self, num=1): + """ + Compute `num` iterations. + """ + error_dct = super().engine_iterate(num) + # copy all data back to cpu + self._set_pr_ob_ref_for_data(dev='cpu', container=None, sync_copy=True) + return error_dct + + def engine_finalize(self): + """ + Clear all GPU data, pinned memory, etc + """ + super().engine_finalize() diff --git a/ptypy/custom/LBFGS_serial.py b/ptypy/custom/LBFGS_serial.py new file mode 100644 index 000000000..27ac49bd7 --- /dev/null +++ b/ptypy/custom/LBFGS_serial.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- +""" +Limited-memory BFGS reconstruction engine. + +TODO. + + * Implement other regularizers + +This file is part of the PTYPY package. + + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import numpy as np +import time + +from ptypy.custom.LBFGS import LBFGS +from ptypy.engines.ML import ML, BaseModel +# from .projectional_serial import serialize_array_access +from ptypy.accelerate.base.engines.ML_serial import ML_serial, GaussianModel +from ptypy.accelerate.base.engines.projectional_serial import serialize_array_access +from ptypy import utils as u +from ptypy.utils.verbose import logger, log +from ptypy.utils import parallel +from ptypy.engines.utils import Cnorm2, Cdot +from ptypy.engines import register +from ptypy.accelerate.base.kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel +from ptypy.accelerate.base import address_manglers + + +__all__ = ['LBFGS_serial'] + +@register() +class LBFGS_serial(LBFGS, ML_serial): + + def __init__(self, ptycho_parent, pars=None): + """ + Limited-memory BFGS reconstruction engine. + """ + super(LBFGS_serial, self).__init__(ptycho_parent, pars) + + self.cdotr_ob_ys = [0] * self.p.bfgs_memory_size + self.cdotr_pr_ys = [0] * self.p.bfgs_memory_size + self.cn2_ob_y = [0] * self.p.bfgs_memory_size + self.cn2_pr_y = [0] * self.p.bfgs_memory_size + + def engine_initialize(self): + """ + Prepare for reconstruction. + """ + super(LBFGS_serial, self).engine_initialize() + + def _initialize_model(self): + # Create noise model + if self.p.ML_type.lower() == "gaussian": + self.ML_model = GaussianModel(self) + elif self.p.ML_type.lower() == "poisson": + raise NotImplementedError('Poisson norm model not yet implemented') + elif self.p.ML_type.lower() == "euclid": + raise NotImplementedError('Euclid norm model not yet implemented') + else: + raise RuntimeError("Unsupported ML_type: '%s'" % self.p.ML_type) + + def engine_prepare(self): + super(LBFGS_serial, self).engine_prepare() + + def _get_smooth_gradient(self, data, sigma): + return self.smooth_gradient(data) + + def _smooth_gradient(self): + new_ob_grad = self.ob_grad_new + # Smoothing preconditioner decay (once per iteration) + if self.smooth_gradient: + self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) + for name, s in new_ob_grad.storages.items(): + s.data[:] = self._get_smooth_gradient(s.data, self.smooth_gradient.sigma) + + def _get_ob_norm(self): + norm = Cnorm2(self.ob_grad_new) + return norm + + def _get_pr_norm(self): + norm = Cnorm2(self.pr_grad_new) + return norm + + def _replace_ob_pr_ys(self, mi): + self.cdotr_ob_ys[mi-1] = np.real(Cdot(self.ob_y[mi-1], + self.ob_s[mi-1])) + self.cdotr_pr_ys[mi-1] = np.real(Cdot(self.pr_y[mi-1], + self.pr_s[mi-1])) + self.cn2_ob_y[mi-1] = Cnorm2(self.ob_y[mi-1]) + self.cn2_pr_y[mi-1] = Cnorm2(self.pr_y[mi-1]) + + def _get_ob_pr_sh(self, k): + ob_sh = np.real(Cdot(self.ob_s[k], self.ob_h)) + pr_sh = np.real(Cdot(self.pr_s[k], self.pr_h)) + return ob_sh, pr_sh + + def _get_ob_pr_yh(self, k): + ob_yh = np.real(Cdot(self.ob_y[k], self.ob_h)) + pr_yh = np.real(Cdot(self.pr_y[k], self.pr_h)) + return ob_yh, pr_yh + + def engine_iterate(self, num=1): + """ + Compute `num` iterations. + """ + ######################## + # Compute new gradient + ######################## + tg = 0. + tc = 0. + ta = time.time() + for it in range(num): + t1 = time.time() + error_dct = self.ML_model.new_grad() + new_ob_grad, new_pr_grad = self.ob_grad_new, self.pr_grad_new + tg += time.time() - t1 + + if self.p.probe_update_start <= self.curiter: + # Apply probe support if needed + for name, s in new_pr_grad.storages.items(): + self.support_constraint(s) + #support = self.probe_support.get(name) + #if support is not None: + # s.data *= support + else: + new_pr_grad.fill(0.) + + # probe/object rescaling (not used for now) + if self.p.scale_precond: + cn2_new_pr_grad = self._get_pr_norm() + cn2_new_ob_grad = self._get_ob_norm() + + if cn2_new_pr_grad > 1e-5: + scale_p_o = (self.p.scale_probe_object * cn2_new_ob_grad + / cn2_new_pr_grad) + else: + scale_p_o = self.p.scale_probe_object + if self.scale_p_o is None: + self.scale_p_o = scale_p_o + else: + self.scale_p_o = self.scale_p_o ** self.scale_p_o_memory + self.scale_p_o *= scale_p_o ** (1-self.scale_p_o_memory) + logger.debug('Scale P/O: %6.3g' % scale_p_o) + else: + self.scale_p_o = self.p.scale_probe_object + + ############################ + # LBFGS Two Loop Recursion + ############################ + if self.curiter == 0: # Initial steepest-descent step + + # Wavefield preconditioner (applied twice) + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + new_ob_grad.storages[name].data /= self.ob_fln.storages[name].data + self.p.wavefield_delta_object + new_pr_grad.storages[name].data /= self.pr_fln.storages[name].data + self.p.wavefield_delta_probe + + # Object steepest-descent step + self.ob_h -= new_ob_grad + + # Probe steepest-descent step + new_pr_grad *= self.scale_p_o # probe preconditioning (applied twice) + self.pr_h -= new_pr_grad + + else: # Two-loop LBFGS recursion + + # Wavefield preconditioner + if self.p.wavefield_precond: + for name, s in self.ob_h.storages.items(): + self.ob_h.storages[name].data *= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + self.pr_h.storages[name].data *= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + for name, s in new_ob_grad.storages.items(): + new_ob_grad.storages[name].data /= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + new_pr_grad.storages[name].data /= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + + # Memory index + mi = min(self.curiter, self.p.bfgs_memory_size) + + # Remember last object update and gradient difference + self.ob_s[mi-1] << self.ob_h + self.ob_y[mi-1] << new_ob_grad + self.ob_y[mi-1] -= self.ob_grad + + # Remember last probe update and gradient difference + self.pr_h /= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_s[mi-1] << self.pr_h + new_pr_grad *= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_y[mi-1] << new_pr_grad + self.pr_y[mi-1] -= self.pr_grad + + # Compute and store rho + self._replace_ob_pr_ys(mi) + self.rho[mi-1] = 1. / (self.cdotr_ob_ys[mi-1] + + self.cdotr_pr_ys[mi-1]) + + # BFGS update + self.ob_h << new_ob_grad + self.pr_h << new_pr_grad + # Compute right-hand side of BGFS product + for i in reversed(range(mi)): + ob_sh, pr_sh = self._get_ob_pr_sh(i) + self.alpha[i] = self.rho[i]*(ob_sh + pr_sh) + + #TODO: support operand * for 'float' and 'Container' + # (reusing self.ob_grad here is not efficient) + # self.ob_h -= self.alpha[i]*self.ob_y[i] + self.ob_grad << self.ob_y[i] + self.ob_grad *= self.alpha[i] + self.ob_h -= self.ob_grad + #TODO: support operand * for 'float' and 'Container' + # (reusing self.pr_grad here is not efficient) + # self.pr_h -= self.alpha[i]*self.pr_y[i] + self.pr_grad << self.pr_y[i] + self.pr_grad *= self.alpha[i] + self.pr_h -= self.pr_grad + + # Compute centre of BFGS product (scaled identity) + c_num = self.cdotr_ob_ys[mi-1] + self.cdotr_pr_ys[mi-1] + c_denom = self.cn2_ob_y[mi-1] + self.cn2_pr_y[mi-1] + self.ob_h *= (c_num/c_denom) + self.pr_h *= (c_num/c_denom) + + + # Compute left-hand side of BFGS product + for i in range(mi): + ob_yh, pr_yh = self._get_ob_pr_yh(i) + beta = self.rho[i]*(ob_yh + pr_yh) + + #TODO: support operand * for 'float' and 'Container' + # (reusing self.ob_grad here is not efficient) + # self.ob_h += (self.alpha[i]-beta)*self.ob_s[i] + self.ob_grad << self.ob_s[i] + self.ob_grad *= (self.alpha[i]-beta) + self.ob_h += self.ob_grad + + #TODO: support operand * for 'float' and 'Container' + # (reusing self.pr_grad here is not efficient) + # self.pr_h += (self.alpha[i]-beta)*self.pr_s[i] + self.pr_grad << self.pr_s[i] + self.pr_grad *= (self.alpha[i]-beta) + self.pr_h += self.pr_grad + + # Wavefield preconditioner + if self.p.wavefield_precond: + for name, s in self.ob_h.storages.items(): + self.ob_h.storages[name].data /= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + self.pr_h.storages[name].data /= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + + # Flip step direction for minimisation + self.ob_h *= -1 + self.pr_h *= np.sqrt(self.scale_p_o) # probe preconditioning + self.pr_h *= -1 + + # update current gradients with new gradients + self.ob_grad << new_ob_grad + self.pr_grad << new_pr_grad + + # linesearch (same as ML) + t2 = time.time() + B = self.ML_model.poly_line_coeffs(self.ob_h, self.pr_h) + tc += time.time() - t2 + + + if np.isinf(B).any() or np.isnan(B).any(): + logger.warning( + 'Warning! inf or nan found! Trying to continue...') + B[np.isinf(B)] = 0. + B[np.isnan(B)] = 0. + + dt = self.ptycho.FType + self.tmin = dt(-.5 * B[1] / B[2]) + + # step update + self.ob_h *= self.tmin + self.pr_h *= self.tmin + self.ob += self.ob_h + self.pr += self.pr_h + + # Roll memory for overwriting + if self.curiter >= self.p.bfgs_memory_size: + self.ob_s.append(self.ob_s.pop(0)) + self.pr_s.append(self.pr_s.pop(0)) + self.ob_y.append(self.ob_y.pop(0)) + self.pr_y.append(self.pr_y.pop(0)) + self.rho = np.roll(self.rho,-1) + + # Position correction + self.position_update() + + # Allow for customized modifications at the end of each iteration + self._post_iterate_update() + + # increase iteration counter + self.curiter +=1 + + logger.info('Time spent in gradient calculation: %.2f' % tg) + logger.info(' .... in coefficient calculation: %.2f' % tc) + return error_dct # np.array([[self.ML_model.LL[0]] * 3]) + + def engine_finalize(self): + super(LBFGS_serial, self).engine_finalize() diff --git a/templates/engines/cupy/moonflower_LBFGS_cupy.py b/templates/engines/cupy/moonflower_LBFGS_cupy.py new file mode 100644 index 000000000..8cc235756 --- /dev/null +++ b/templates/engines/cupy/moonflower_LBFGS_cupy.py @@ -0,0 +1,62 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="cupy") +from ptypy.custom import LBFGS_cupy + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 400 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 100 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS_cupy' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.reg_del2 = True # Whether to use a Gaussian prior (smoothing) regularizer +p.engines.engine00.reg_del2_amplitude = 1. # Amplitude of the Gaussian prior if used +p.engines.engine00.scale_precond = True +p.engines.engine00.floating_intensities = False + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/cupy/moonflower_LBFGS_cupy_wavefield.py b/templates/engines/cupy/moonflower_LBFGS_cupy_wavefield.py new file mode 100644 index 000000000..3273e715c --- /dev/null +++ b/templates/engines/cupy/moonflower_LBFGS_cupy_wavefield.py @@ -0,0 +1,59 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="cupy") +from ptypy.custom import LBFGS_cupy + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 400 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 100 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS_cupy' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/numpy/moonflower_LBFGS_Gaussian.py b/templates/engines/numpy/moonflower_LBFGS_Gaussian.py new file mode 100644 index 000000000..995bce464 --- /dev/null +++ b/templates/engines/numpy/moonflower_LBFGS_Gaussian.py @@ -0,0 +1,61 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" +#import ptypy +from ptypy.core import Ptycho +from ptypy import utils as u +from ptypy.custom import LBFGS + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" + +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) + +# saving intermediate results +p.io.autosave = u.Param(active=False) + +# opens plotting GUI if interaction set to active) +p.io.autoplot = u.Param(active=True) +p.io.interaction = u.Param(active=True) + +# max 100 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS' +p.engines.engine00.ML_type = 'Gaussian' +p.engines.engine00.reg_del2 = True # Whether to use a Gaussian prior (smoothing) regularizer +p.engines.engine00.reg_del2_amplitude = 1. # Amplitude of the Gaussian prior if used +p.engines.engine00.scale_precond = True +#p.engines.engine00.scale_probe_object = 1. +p.engines.engine00.floating_intensities = False +p.engines.engine00.numiter = 300 + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/numpy/moonflower_LBFGS_Gaussian_wavefield.py b/templates/engines/numpy/moonflower_LBFGS_Gaussian_wavefield.py new file mode 100644 index 000000000..13c28a536 --- /dev/null +++ b/templates/engines/numpy/moonflower_LBFGS_Gaussian_wavefield.py @@ -0,0 +1,57 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" +#import ptypy +from ptypy.core import Ptycho +from ptypy import utils as u +from ptypy.custom import LBFGS + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" + +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) + +# saving intermediate results +p.io.autosave = u.Param(active=False) + +# opens plotting GUI if interaction set to active) +p.io.autoplot = u.Param(active=True) +p.io.interaction = u.Param(active=True) + +# max 100 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS' +p.engines.engine00.ML_type = 'Gaussian' +p.engines.engine00.numiter = 300 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/pycuda/moonflower_LBFGS_pycuda.py b/templates/engines/pycuda/moonflower_LBFGS_pycuda.py new file mode 100644 index 000000000..2e4ea2828 --- /dev/null +++ b/templates/engines/pycuda/moonflower_LBFGS_pycuda.py @@ -0,0 +1,62 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="cuda") +from ptypy.custom import LBFGS_pycuda + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 400 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 100 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS_pycuda' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.reg_del2 = True # Whether to use a Gaussian prior (smoothing) regularizer +p.engines.engine00.reg_del2_amplitude = 1. # Amplitude of the Gaussian prior if used +p.engines.engine00.scale_precond = True +p.engines.engine00.floating_intensities = False + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/pycuda/moonflower_LBFGS_pycuda_wavefield.py b/templates/engines/pycuda/moonflower_LBFGS_pycuda_wavefield.py new file mode 100644 index 000000000..eff11bf01 --- /dev/null +++ b/templates/engines/pycuda/moonflower_LBFGS_pycuda_wavefield.py @@ -0,0 +1,59 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="cuda") +from ptypy.custom import LBFGS_pycuda + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 400 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 100 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'LBFGS_pycuda' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/notebooks/moonflower_lbfgs.ipynb b/templates/notebooks/moonflower_lbfgs.ipynb new file mode 100644 index 000000000..dffa0b537 --- /dev/null +++ b/templates/notebooks/moonflower_lbfgs.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PtyPy moonflower example\n", + "#### scan model: BlockGradFull\n", + "#### engine: Limited-memory BFGS (LBFGS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "from ptypy.core import Ptycho\n", + "from ptypy import utils as u\n", + "from ptypy.custom import LBFGS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# create parameter tree\n", + "p = u.Param()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# set verbose level to interactive\n", + "p.verbose_level = \"interactive\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# set home path and io settings (no files saved)\n", + "p.io = u.Param()\n", + "p.io.rfile = None\n", + "p.io.autosave = u.Param(active=False)\n", + "p.io.autoplot = u.Param(active=False)\n", + "p.io.interaction = u.Param(active=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# max 200 frames (128x128px) of diffraction data\n", + "p.scans = u.Param()\n", + "p.scans.MF = u.Param()\n", + "p.scans.MF.name = 'BlockGradFull'\n", + "p.scans.MF.data= u.Param()\n", + "p.scans.MF.data.name = 'MoonFlowerScan'\n", + "p.scans.MF.data.shape = 128\n", + "p.scans.MF.data.num_frames = 200\n", + "p.scans.MF.data.save = None\n", + "p.scans.MF.data.density = 0.2\n", + "p.scans.MF.data.photons = 1e8\n", + "p.scans.MF.data.psf = 0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# maximum likelihood reconstrucion engine\n", + "p.engines = u.Param()\n", + "p.engines.engine00 = u.Param()\n", + "p.engines.engine00.name = 'LBFGS'\n", + "p.engines.engine00.ML_type = 'Gaussian'\n", + "p.engines.engine00.reg_del2 = True \n", + "p.engines.engine00.reg_del2_amplitude = 1. \n", + "p.engines.engine00.scale_precond = True\n", + "p.engines.engine00.floating_intensities = False\n", + "p.engines.engine00.numiter = 300" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# prepare and run\n", + "P = Ptycho(p,level=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plotting the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "import ptypy.utils.plot_client as pc\n", + "fig = pc.figure_from_ptycho(P)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/templates/notebooks/moonflower_lbfgs_pycuda.ipynb b/templates/notebooks/moonflower_lbfgs_pycuda.ipynb new file mode 100644 index 000000000..a2a03f3d6 --- /dev/null +++ b/templates/notebooks/moonflower_lbfgs_pycuda.ipynb @@ -0,0 +1,194 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PtyPy moonflower example\n", + "#### scan model: BlockGradFull\n", + "#### engine: Limited-memory BFGS (LBFGS) with GPU acceleration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "import ptypy\n", + "from ptypy.core import Ptycho\n", + "from ptypy import utils as u" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Import pycuda engines (DM, RAAR)\n", + "ptypy.load_gpu_engines(\"cuda\")\n", + "from ptypy.custom import LBFGS_pycuda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# create parameter tree\n", + "p = u.Param()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# set verbose level to interactive\n", + "p.verbose_level = \"interactive\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# Nr. of frames in a block\n", + "p.frames_per_block = 20" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# set home path and io settings (no files saved)\n", + "p.io = u.Param()\n", + "p.io.rfile = None\n", + "p.io.autosave = u.Param(active=False)\n", + "p.io.autoplot = u.Param(active=False)\n", + "p.io.interaction = u.Param(active=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# max 200 frames (128x128px) of diffraction data\n", + "p.scans = u.Param()\n", + "p.scans.MF = u.Param()\n", + "p.scans.MF.name = 'BlockGradFull'\n", + "p.scans.MF.data= u.Param()\n", + "p.scans.MF.data.name = 'MoonFlowerScan'\n", + "p.scans.MF.data.shape = 128\n", + "p.scans.MF.data.num_frames = 200\n", + "p.scans.MF.data.save = None\n", + "p.scans.MF.data.density = 0.2\n", + "p.scans.MF.data.photons = 1e8\n", + "p.scans.MF.data.psf = 0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# maximum likelihood reconstrucion engine\n", + "p.engines = u.Param()\n", + "p.engines.engine00 = u.Param()\n", + "p.engines.engine00.name = 'LBFGS_pycuda'\n", + "p.engines.engine00.ML_type = 'Gaussian'\n", + "p.engines.engine00.reg_del2 = True \n", + "p.engines.engine00.reg_del2_amplitude = 1. \n", + "p.engines.engine00.scale_precond = True\n", + "p.engines.engine00.floating_intensities = False\n", + "p.engines.engine00.numiter = 300" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# prepare and run\n", + "P = Ptycho(p,level=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plotting the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "import ptypy.utils.plot_client as pc\n", + "fig = pc.figure_from_ptycho(P)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/accelerate_tests/base_tests/LBFGS_test.py b/test/accelerate_tests/base_tests/LBFGS_test.py new file mode 100644 index 000000000..8f679b1a6 --- /dev/null +++ b/test/accelerate_tests/base_tests/LBFGS_test.py @@ -0,0 +1,192 @@ +""" +Test for the LBFGS engine. + +This file is part of the PTYPY package. + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import unittest + +from test import utils as tu +from ptypy import utils as u +import ptypy +from ptypy.custom import LBFGS, LBFGS_serial +import tempfile +import shutil +import numpy as np +import pytest + +class LBFGSSerialTest(unittest.TestCase): + + def setUp(self): + self.outpath = tempfile.mkdtemp(suffix="LBFGS_serial_test") + + def tearDown(self): + shutil.rmtree(self.outpath) + + def check_engine_output(self, output, plotting=False, debug=False, tol=0.1): + P_LBFGS, P_LBFGS_serial = output + numiter = len(P_LBFGS.runtime["iter_info"]) + LL_LBFGS = np.array([P_LBFGS.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + LL_LBFGS_serial = np.array([P_LBFGS_serial.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + crop = 42 + OBJ_LBFGS_serial, OBJ_LBFGS = P_LBFGS_serial.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop], P_LBFGS.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop] + PRB_LBFGS_serial, PRB_LBFGS = P_LBFGS_serial.probe.S["SMFG00"].data[0], P_LBFGS.probe.S["SMFG00"].data[0] + eng_LBFGS = P_LBFGS.engines["engine00"] + eng_LBFGS_serial = P_LBFGS_serial.engines["engine00"] + if debug: + import matplotlib.pyplot as plt + plt.figure("LBFGS debug") + plt.imshow(np.abs(eng_LBFGS.debug)) + plt.figure("LBFGS serial debug") + plt.imshow(np.abs(eng_LBFGS_serial.debug)) + plt.show() + + if plotting: + import matplotlib.pyplot as plt + plt.figure("Errors") + plt.plot(LL_LBFGS, label="LBFGS") + plt.plot(LL_LBFGS_serial, label="LBFGS_serial") + plt.legend() + plt.show() + plt.figure("Phase LBFGS") + plt.imshow(np.angle(OBJ_LBFGS)) + plt.figure("Ampltitude LBFGS") + plt.imshow(np.abs(OBJ_LBFGS)) + plt.figure("Phase LBFGS serial") + plt.imshow(np.angle(OBJ_LBFGS_serial)) + plt.figure("Amplitude LBFGS serial") + plt.imshow(np.abs(OBJ_LBFGS_serial)) + plt.figure("Phase difference") + plt.imshow(np.angle(OBJ_LBFGS_serial) - np.angle(OBJ_LBFGS), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.figure("Amplitude difference") + plt.imshow(np.abs(OBJ_LBFGS_serial) - np.abs(OBJ_LBFGS), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.show() + # np.testing.assert_allclose(eng_LBFGS.debug, eng_LBFGS_serial.debug, atol=1e-7, rtol=1e-7, + # err_msg="The debug arrays are not matching as expected") + RMSE_ob = (np.mean(np.abs(OBJ_LBFGS_serial - OBJ_LBFGS)**2)) + RMSE_pr = (np.mean(np.abs(PRB_LBFGS_serial - PRB_LBFGS)**2)) + # RMSE_LL = (np.mean(np.abs(LL_LBFGS_serial - LL_LBFGS)**2)) + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, + err_msg="The object arrays are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") + # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, + # err_msg="The log-likelihood errors are not matching as expected") + + + def test_LBFGS_serial_base(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_serial_regularizer(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 90 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False, tol=0.2) + + def test_LBFGS_serial_preconditioner(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_serial_floating(self): + out = [] + # fail at iter num 80 + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = True + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_serial_smoothing_regularizer(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + @pytest.mark.skip(reason="Funny behaviour with this test, most likely related to Gaussian filter, see issue #607") + def test_LBFGS_serial_wavefield_preconditioner(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 500 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0. + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) + + def test_LBFGS_serial_all(self): + out = [] + for eng in ["LBFGS", "LBFGS_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + +if __name__ == "__main__": + unittest.main() diff --git a/test/accelerate_tests/cuda_cupy_tests/LBFGS_test.py b/test/accelerate_tests/cuda_cupy_tests/LBFGS_test.py new file mode 100644 index 000000000..902b62095 --- /dev/null +++ b/test/accelerate_tests/cuda_cupy_tests/LBFGS_test.py @@ -0,0 +1,190 @@ +""" +Test for the LBFGS engine. + +This file is part of the PTYPY package. + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import unittest + +from test import utils as tu +from ptypy import utils as u +import ptypy +from ptypy.custom import LBFGS_serial, LBFGS_cupy +import tempfile +import shutil +import numpy as np + +class LBFGSCupyTest(unittest.TestCase): + + def setUp(self): + self.outpath = tempfile.mkdtemp(suffix="LBFGS_cupy_test") + + def tearDown(self): + shutil.rmtree(self.outpath) + + def check_engine_output(self, output, plotting=False, debug=False, tol=0.1): + P_LBFGS_serial, P_LBFGS_cupy = output + numiter = len(P_LBFGS_serial.runtime["iter_info"]) + LL_LBFGS_serial = np.array([P_LBFGS_serial.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + LL_LBFGS_cupy = np.array([P_LBFGS_cupy.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + crop = 42 + OBJ_LBFGS_cupy, OBJ_LBFGS_serial = P_LBFGS_cupy.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop], P_LBFGS_serial.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop] + PRB_LBFGS_cupy, PRB_LBFGS_serial = P_LBFGS_cupy.probe.S["SMFG00"].data[0], P_LBFGS_serial.probe.S["SMFG00"].data[0] + eng_LBFGS_serial = P_LBFGS_serial.engines["engine00"] + eng_LBFGS_cupy = P_LBFGS_cupy.engines["engine00"] + if debug: + import matplotlib.pyplot as plt + plt.figure("LBFGS serial debug") + plt.imshow(np.abs(eng_LBFGS_serial.debug)) + plt.figure("LBFGS cupy debug") + plt.imshow(np.abs(eng_LBFGS_cupy.debug)) + plt.show() + + if plotting: + import matplotlib.pyplot as plt + plt.figure("Errors") + plt.plot(LL_LBFGS_serial, label="LBFGS_serial") + plt.plot(LL_LBFGS_cupy, label="LBFGS_cupy") + plt.legend() + plt.show() + plt.figure("Phase LBFGS serial") + plt.imshow(np.angle(OBJ_LBFGS_serial)) + plt.figure("Ampltitude LBFGS serial") + plt.imshow(np.abs(OBJ_LBFGS_serial)) + plt.figure("Phase LBFGS cupy") + plt.imshow(np.angle(OBJ_LBFGS_cupy)) + plt.figure("Amplitude LBFGS cupy") + plt.imshow(np.abs(OBJ_LBFGS_cupy)) + plt.figure("Phase difference") + plt.imshow(np.angle(OBJ_LBFGS_cupy) - np.angle(OBJ_LBFGS_serial), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.figure("Amplitude difference") + plt.imshow(np.abs(OBJ_LBFGS_cupy) - np.abs(OBJ_LBFGS_serial), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.show() + # np.testing.assert_allclose(eng_LBFGS.debug, eng_LBFGS_serial.debug, atol=1e-7, rtol=1e-7, + # err_msg="The debug arrays are not matching as expected") + RMSE_ob = (np.mean(np.abs(OBJ_LBFGS_cupy - OBJ_LBFGS_serial)**2)) + RMSE_pr = (np.mean(np.abs(PRB_LBFGS_cupy - PRB_LBFGS_serial)**2)) + # RMSE_LL = (np.mean(np.abs(LL_LBFGS_cupy - LL_LBFGS_serial)**2)) + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, + err_msg="The object arrays are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") + # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, + # err_msg="The log-likelihood errors are not matching as expected") + + + def test_LBFGS_cupy_base(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_cupy_regularizer(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 90 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_cupy_preconditioner(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_cupy_floating(self): + out = [] + # fail at iter num 80 + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = True + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_cupy_smoothing_regularizer(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_cupy_wavefield_preconditioner(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 500 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0 + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) + + def test_LBFGS_cupy_all(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + +if __name__ == "__main__": + unittest.main() diff --git a/test/accelerate_tests/cuda_pycuda_tests/LBFGS_test.py b/test/accelerate_tests/cuda_pycuda_tests/LBFGS_test.py new file mode 100644 index 000000000..f5a077d2a --- /dev/null +++ b/test/accelerate_tests/cuda_pycuda_tests/LBFGS_test.py @@ -0,0 +1,190 @@ +""" +Test for the LBFGS engine. + +This file is part of the PTYPY package. + :copyright: Copyright 2014 by the PTYPY team, see AUTHORS. + :license: GPLv2, see LICENSE for details. +""" +import unittest + +from test import utils as tu +from ptypy import utils as u +import ptypy +from ptypy.custom import LBFGS_serial, LBFGS_pycuda +import tempfile +import shutil +import numpy as np + +class LBFGSPycudaTest(unittest.TestCase): + + def setUp(self): + self.outpath = tempfile.mkdtemp(suffix="LBFGS_pycuda_test") + + def tearDown(self): + shutil.rmtree(self.outpath) + + def check_engine_output(self, output, plotting=False, debug=False, tol=0.1): + P_LBFGS_serial, P_LBFGS_pycuda = output + numiter = len(P_LBFGS_serial.runtime["iter_info"]) + LL_LBFGS_serial = np.array([P_LBFGS_serial.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + LL_LBFGS_pycuda = np.array([P_LBFGS_pycuda.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) + crop = 42 + OBJ_LBFGS_pycuda, OBJ_LBFGS_serial = P_LBFGS_pycuda.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop], P_LBFGS_serial.obj.S["SMFG00"].data[0,crop:-crop,crop:-crop] + PRB_LBFGS_pycuda, PRB_LBFGS_serial = P_LBFGS_pycuda.probe.S["SMFG00"].data[0], P_LBFGS_serial.probe.S["SMFG00"].data[0] + eng_LBFGS_serial = P_LBFGS_serial.engines["engine00"] + eng_LBFGS_pycuda = P_LBFGS_pycuda.engines["engine00"] + if debug: + import matplotlib.pyplot as plt + plt.figure("LBFGS serial debug") + plt.imshow(np.abs(eng_LBFGS_serial.debug)) + plt.figure("LBFGS pycuda debug") + plt.imshow(np.abs(eng_LBFGS_pycuda.debug)) + plt.show() + + if plotting: + import matplotlib.pyplot as plt + plt.figure("Errors") + plt.plot(LL_LBFGS_serial, label="LBFGS_serial") + plt.plot(LL_LBFGS_pycuda, label="LBFGS_pycuda") + plt.legend() + plt.show() + plt.figure("Phase LBFGS serial") + plt.imshow(np.angle(OBJ_LBFGS_serial)) + plt.figure("Ampltitude LBFGS serial") + plt.imshow(np.abs(OBJ_LBFGS_serial)) + plt.figure("Phase LBFGS pycuda") + plt.imshow(np.angle(OBJ_LBFGS_pycuda)) + plt.figure("Amplitude LBFGS pycuda") + plt.imshow(np.abs(OBJ_LBFGS_pycuda)) + plt.figure("Phase difference") + plt.imshow(np.angle(OBJ_LBFGS_pycuda) - np.angle(OBJ_LBFGS_serial), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.figure("Amplitude difference") + plt.imshow(np.abs(OBJ_LBFGS_pycuda) - np.abs(OBJ_LBFGS_serial), vmin=-0.1, vmax=0.1) + plt.colorbar() + plt.show() + # np.testing.assert_allclose(eng_LBFGS.debug, eng_LBFGS_serial.debug, atol=1e-7, rtol=1e-7, + # err_msg="The debug arrays are not matching as expected") + RMSE_ob = (np.mean(np.abs(OBJ_LBFGS_pycuda - OBJ_LBFGS_serial)**2)) + RMSE_pr = (np.mean(np.abs(PRB_LBFGS_pycuda - PRB_LBFGS_serial)**2)) + # RMSE_LL = (np.mean(np.abs(LL_LBFGS_pycuda - LL_LBFGS_serial)**2)) + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, + err_msg="The object arrays are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") + # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, + # err_msg="The log-likelihood errors are not matching as expected") + + + def test_LBFGS_pycuda_base(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_pycuda_regularizer(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 90 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_pycuda_preconditioner(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_pycuda_floating(self): + out = [] + # fail at iter num 80 + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = True + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_pycuda_smoothing_regularizer(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = False + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + + def test_LBFGS_pycuda_wavefield_preconditioner(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 700 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0 + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) + + def test_LBFGS_pycuda_all(self): + out = [] + for eng in ["LBFGS_serial", "LBFGS_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = True + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 20 + engine_params.smooth_gradient_decay = 1/10. + engine_params.scale_precond = True + engine_params.scale_probe_object = 1e-6 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + +if __name__ == "__main__": + unittest.main()