From 0c78215c8190283c51b25706fcaf284d8b03917c Mon Sep 17 00:00:00 2001 From: Timothy Poon <62692924+ptim0626@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:38:21 +0000 Subject: [PATCH 01/12] Drop Python <= 3.8 support and add 3.13 (#588) * Drop Python <= 3.8 support * Add py 3.13 in CI tests * Use conda-incubator to set up conda env * Ensure correct activation throughout the tests * Migrate to importlib in script2rst * Migrate to importlib in resources * Migrate to packaging.version --- .github/workflows/test.yml | 32 ++++++++----------- dependencies_dev.yml | 1 + dependencies_full.yml | 1 + doc/script2rst.py | 30 ++++++++--------- ptypy/accelerate/cuda_cupy/multi_gpu.py | 8 ++--- ptypy/accelerate/cuda_pycuda/multi_gpu.py | 10 +++--- ptypy/resources/__init__.py | 14 ++++---- pyproject.toml | 6 ++-- .../cuda_cupy_tests/multi_gpu_test.py | 10 +++--- .../cuda_pycuda_tests/multi_gpu_test.py | 6 ++-- 10 files changed, 58 insertions(+), 60 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d53a9a632..b37bc8c30 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,16 +20,19 @@ on: jobs: build-linux: runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} strategy: max-parallel: 10 fail-fast: false matrix: python-version: - - "3.8" - "3.9" - "3.10" - "3.11" - "3.12" + - "3.13" conda-env: - "core" - "full" @@ -37,20 +40,14 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + - uses: conda-incubator/setup-miniconda@v3 with: - python-version: ${{ matrix.python-version }} - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - conda --version - conda info - - name: Make sure conda is updated - run: | - conda update conda - conda --version + auto-update-conda: true + auto-activate-base: false + conda-remove-defaults: true + channels: conda-forge + activate-environment: ptypy_env + python-version: ${{ matrix.python-version }} - name: Install ${{ matrix.conda-env }} dependencies run: | # replace python version in dependencies @@ -58,16 +55,13 @@ jobs: if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.12' ]; then sed -i '/- pyfftw/d' dependencies_${{ matrix.conda-env }}.yml fi - # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.8' ]; then - # sed -i '/- mpi4py/d' dependencies_${{ matrix.conda-env }}.yml - # fi # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.9' ]; then # sed -i '/- mpi4py/d' dependencies_${{ matrix.conda-env }}.yml # fi conda install --solver=classic mpich - conda env update --file dependencies_${{ matrix.conda-env }}.yml --name base + conda env update --file dependencies_${{ matrix.conda-env }}.yml --name ptypy_env conda install --solver=classic flake8 pytest pytest-cov - conda list + conda list - name: Prepare ptypy run: | # Install ptypy diff --git a/dependencies_dev.yml b/dependencies_dev.yml index 5462c145a..6563f9dd8 100644 --- a/dependencies_dev.yml +++ b/dependencies_dev.yml @@ -10,6 +10,7 @@ dependencies: - pyzmq - pep8 - mpi4py + - packaging - pillow - pyfftw - pip diff --git a/dependencies_full.yml b/dependencies_full.yml index e43241fce..205bdb19e 100644 --- a/dependencies_full.yml +++ b/dependencies_full.yml @@ -9,6 +9,7 @@ dependencies: - h5py - pyzmq - mpi4py + - packaging - pillow - pyfftw - pyyaml diff --git a/doc/script2rst.py b/doc/script2rst.py index b05148e46..b30d94667 100644 --- a/doc/script2rst.py +++ b/doc/script2rst.py @@ -1,5 +1,6 @@ import sys import io +from importlib.resources import files import contextlib import os @@ -9,13 +10,12 @@ 'simupod.py', 'ownengine.py', 'subclassptyscan.py'] +_ptypy_dir = files('ptypy') if len(sys.argv) == 1: - import pkg_resources - for script in scripts: - scr = pkg_resources.resource_filename('ptypy', tutorial_dir+script) - if not os.path.exists(scr): + scr = _ptypy_dir / (tutorial_dir + script) + if not scr.exists(): print('Using backup tutorial for %s' % script) scr = '../tutorial/'+script #subprocess.call(['python',sys.argv[0]+' '+scr]) # doesn't work @@ -50,13 +50,13 @@ def stdoutIO(stdout=None): frst.write(""" .. note:: This tutorial was generated from the python source - :file:`[ptypy_root]/tutorial/%(fname)s` using :file:`ptypy/doc/%(this)s`. + :file:`[ptypy_root]/tutorial/%(fname)s` using :file:`ptypy/doc/%(this)s`. You are encouraged to modify the parameters and rerun the tutorial with:: - + $ python [ptypy_root]/tutorial/%(fname)s """ % {'fname': os.path.split(script_name)[-1], 'this': sys.argv[0]}) - + was_comment = True while True: @@ -86,7 +86,7 @@ def stdoutIO(stdout=None): frst.write(' '+line2[1:].strip()+'\n') frst.write('\n') continue - + if line.startswith('"""'): frst.write('.. parsed-literal::\n\n') while True: @@ -95,11 +95,11 @@ def stdoutIO(stdout=None): break frst.write(' ' + line2) continue - + decorator = False indent = False for key in indent_keys: - if line.startswith(key): + if line.startswith(key): indent = True break @@ -125,12 +125,12 @@ def stdoutIO(stdout=None): pt = fpy.tell() exec(func+'\n') continue - + wline = line.strip() if not wline: frst.write('\n') continue - + with stdoutIO() as sout: exec(wline) out = sout.getvalue() @@ -150,9 +150,9 @@ def stdoutIO(stdout=None): if was_comment: wline = '\n::\n\n'+wline was_comment = False - + frst.write(wline+'\n') - + #print out if out.strip(): print(out) @@ -160,5 +160,5 @@ def stdoutIO(stdout=None): frst.write(' '*3+l+'\n') out = '' - + diff --git a/ptypy/accelerate/cuda_cupy/multi_gpu.py b/ptypy/accelerate/cuda_cupy/multi_gpu.py index 6ccbce9a7..e631c3405 100644 --- a/ptypy/accelerate/cuda_cupy/multi_gpu.py +++ b/ptypy/accelerate/cuda_cupy/multi_gpu.py @@ -10,12 +10,12 @@ - OpenMPI in a conda install needs to have the environment variable --> if cuda support isn't enabled, the application simply crashes with a seg fault -2) For NCCL peer-to-peer transfers, the EXCLUSIVE compute mode cannot be used. +2) For NCCL peer-to-peer transfers, the EXCLUSIVE compute mode cannot be used. It should be in DEFAULT mode. """ -from pkg_resources import parse_version +from packaging.version import parse import numpy as np import cupy as cp from ptypy.utils import parallel @@ -44,7 +44,7 @@ have_cuda_mpi = (mpi4py is not None) and \ "OMPI_MCA_opal_cuda_support" in os.environ and \ os.environ["OMPI_MCA_opal_cuda_support"] == "true" and \ - parse_version(parse_version(mpi4py.__version__).base_version) >= parse_version("3.1.0") and \ + parse(parse(mpi4py.__version__).base_version) >= parse("3.1.0") and \ not ('PTYPY_USE_MPI' in os.environ) @@ -114,7 +114,7 @@ def allReduceSum(self, arr): count, datatype = self.__get_NCCL_count_dtype(arr) - self.com.allReduce(arr.data.ptr, arr.data.ptr, count, datatype, nccl.NCCL_SUM, + self.com.allReduce(arr.data.ptr, arr.data.ptr, count, datatype, nccl.NCCL_SUM, cp.cuda.get_current_stream().ptr) def __get_NCCL_count_dtype(self, arr): diff --git a/ptypy/accelerate/cuda_pycuda/multi_gpu.py b/ptypy/accelerate/cuda_pycuda/multi_gpu.py index 73138c3ee..09ec60552 100644 --- a/ptypy/accelerate/cuda_pycuda/multi_gpu.py +++ b/ptypy/accelerate/cuda_pycuda/multi_gpu.py @@ -5,7 +5,7 @@ Findings: -1) NCCL works with unit tests, but not in the engines. It seems to +1) NCCL works with unit tests, but not in the engines. It seems to add something to the existing pycuda Context or create a new one, as a later event recording on an exit wave transfer fails with 'ivalid resource handle' Cuda Error. This error typically happens if for example @@ -22,14 +22,14 @@ - OpenMPI in a conda install needs to have the environment variable --> if cuda support isn't enabled, the application simply crashes with a seg fault -4) For NCCL peer-to-peer transfers, the EXCLUSIVE compute mode cannot be used. +4) For NCCL peer-to-peer transfers, the EXCLUSIVE compute mode cannot be used. It should be in DEFAULT mode. 5) NCCL support has been dropped from PyCUDA module, but can be used with CuPy module instead """ -from pkg_resources import parse_version +from packaging.version import parse import numpy as np from pycuda import gpuarray import pycuda.driver as cuda @@ -54,7 +54,7 @@ have_cuda_mpi = (mpi4py is not None) and \ "OMPI_MCA_opal_cuda_support" in os.environ and \ os.environ["OMPI_MCA_opal_cuda_support"] == "true" and \ - parse_version(parse_version(mpi4py.__version__).base_version) >= parse_version("3.1.0") and \ + parse(parse(mpi4py.__version__).base_version) >= parse("3.1.0") and \ hasattr(gpuarray.GPUArray, '__cuda_array_interface__') and \ not ('PTYPY_USE_MPI' in os.environ) @@ -97,7 +97,7 @@ def allReduceSum(self, arr): if parallel.MPIenabled: comm = parallel.comm comm.Allreduce(parallel.MPI.IN_PLACE, arr) - + # pick the appropriate communicator depending on installed packages def get_multi_gpu_communicator(use_cuda_mpi=True): diff --git a/ptypy/resources/__init__.py b/ptypy/resources/__init__.py index 50bab0b5e..becdece26 100644 --- a/ptypy/resources/__init__.py +++ b/ptypy/resources/__init__.py @@ -1,15 +1,17 @@ -import pkg_resources +from importlib.resources import files import numpy as np -flowerfile = pkg_resources.resource_filename(__name__,'flowers.png') -moonfile = pkg_resources.resource_filename(__name__,'moon.png') -treefile = pkg_resources.resource_filename(__name__,'tree.png') +_res_dir = files('ptypy.resources') +flowerfile = _res_dir / 'flowers.png' +moonfile = _res_dir / 'moon.png' +treefile = _res_dir / 'tree.png' + def flower_obj(shape=None): from ptypy import utils as u import numpy as np from matplotlib.image import imread - + im = u.rgb2complex(imread(flowerfile)) if shape is not None: sh = u.expect2(shape) @@ -37,7 +39,7 @@ def moon_pr(shape=None): from ptypy import utils as u import numpy as np from matplotlib.image import imread - + im = u.rgb2complex(imread(moonfile)) if shape is not None: sh = u.expect2(shape) diff --git a/pyproject.toml b/pyproject.toml index 4b35cf554..d3493bce4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] description = "Ptychography Reconstruction for Python" readme = "README.rst" -requires-python = ">=3.7" +requires-python = ">=3.9" classifiers = [ "Programming Language :: Python :: 3", "Development Status :: 3 - Alpha", @@ -31,7 +31,7 @@ dependencies = [ [project.optional-dependencies] full = ["mpi4py","matplotlib","pyzmq","pillow", "pyfftw", "pyyaml"] cupy-cuda11x = ["mpi4py","matplotlib","pyzmq","pillow", "pyfftw", "cupy-cuda11x"] -cupy-cuda12x = ["mpi4py","matplotlib","pyzmq","pillow", "pyfftw", "cupy-cuda12x"] +cupy-cuda12x = ["mpi4py","matplotlib","pyzmq","pillow", "pyfftw", "cupy-cuda12x"] [project.scripts] "ptypy.plot" = "ptypy.cli.plotter:ptypy_plot" @@ -69,4 +69,4 @@ ptypy = "ptypy" [tool.setuptools.package-data] ptypy = ["resources/*",] -"ptypy.accelerate.cuda_common" = ["*.cu", "*.cuh"] \ No newline at end of file +"ptypy.accelerate.cuda_common" = ["*.cu", "*.cuh"] diff --git a/test/accelerate_tests/cuda_cupy_tests/multi_gpu_test.py b/test/accelerate_tests/cuda_cupy_tests/multi_gpu_test.py index 0c234d878..424c6696b 100644 --- a/test/accelerate_tests/cuda_cupy_tests/multi_gpu_test.py +++ b/test/accelerate_tests/cuda_cupy_tests/multi_gpu_test.py @@ -11,7 +11,7 @@ from ptypy.accelerate.cuda_cupy import multi_gpu as mgpu from ptypy.utils import parallel -from pkg_resources import parse_version +from packaging.version import parse class GpuDataTest(CupyCudaTest): """ @@ -36,8 +36,8 @@ def setUp(self): @unittest.skipIf(parallel.rank != 0, "Only in MPI rank 0") def test_version(self): - v1 = parse_version("3.1.0") - v2 = parse_version(parse_version("3.1.0a").base_version) + v1 = parse("3.1.0") + v2 = parse(parse("3.1.0a").base_version) self.assertGreaterEqual(v2, v1) @@ -61,7 +61,7 @@ def multigpu_tester(self, com): def test_multigpu_auto(self): self.multigpu_tester(mgpu.get_multi_gpu_communicator()) - + def test_multigpu_mpi(self): self.multigpu_tester(mgpu.MultiGpuCommunicatorMpi()) @@ -71,4 +71,4 @@ def test_multigpu_cudampi(self): @unittest.skipIf(not mgpu.have_nccl, "NCCL not available") def test_multigpu_nccl(self): - self.multigpu_tester(mgpu.MultiGpuCommunicatorNccl()) \ No newline at end of file + self.multigpu_tester(mgpu.MultiGpuCommunicatorNccl()) diff --git a/test/accelerate_tests/cuda_pycuda_tests/multi_gpu_test.py b/test/accelerate_tests/cuda_pycuda_tests/multi_gpu_test.py index be96aed54..6aa3ac537 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/multi_gpu_test.py +++ b/test/accelerate_tests/cuda_pycuda_tests/multi_gpu_test.py @@ -13,7 +13,7 @@ from ptypy.accelerate.cuda_pycuda import multi_gpu as mgpu from ptypy.utils import parallel -from pkg_resources import parse_version +from packaging.version import parse class GpuDataTest(unittest.TestCase): """ @@ -51,8 +51,8 @@ def tearDown(self): @unittest.skipIf(parallel.rank != 0, "Only in MPI rank 0") def test_version(self): - v1 = parse_version("3.1.0") - v2 = parse_version(parse_version("3.1.0a").base_version) + v1 = parse("3.1.0") + v2 = parse(parse("3.1.0a").base_version) self.assertGreaterEqual(v2, v1) From ac2563d6026c5944dd31308d50deadccfef649be Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Fri, 13 Dec 2024 18:08:14 +0000 Subject: [PATCH 02/12] Fix in compilation of filtered_fft, import from setuptools instead of distutils (#591) --- cufft/extensions.py | 5 +++-- cufft/setup.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cufft/extensions.py b/cufft/extensions.py index 4d8b7f598..468d8d1cb 100644 --- a/cufft/extensions.py +++ b/cufft/extensions.py @@ -4,8 +4,8 @@ import os, re import subprocess import sysconfig -from distutils.unixccompiler import UnixCCompiler -from distutils.command.build_ext import build_ext +from setuptools._distutils.unixccompiler import UnixCCompiler +from setuptools.command.build_ext import build_ext def find_in_path(name, path): @@ -116,6 +116,7 @@ def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): compiler_command = [self.CUDA["nvcc"]] + self.NVCC_FLAGS + self.OPTFLAGS + ["-Xcompiler"] + self.CXXFLAGS + CPPFLAGS compiler_exec = " ".join(compiler_command) self.set_executable('compiler_so', compiler_exec) + self.set_executable('compiler_so_cxx', compiler_exec) postargs = [] # we don't actually have any postargs super(NvccCompiler, self)._compile(obj, src, ext, cc_args, postargs, pp_opts) # the _compile method # reset the default compiler_so, which we might have changed for cuda diff --git a/cufft/setup.py b/cufft/setup.py index a8ef7c61e..b02262bdf 100644 --- a/cufft/setup.py +++ b/cufft/setup.py @@ -2,7 +2,7 @@ # we should aim to remove the distutils dependency import setuptools -from distutils.core import setup, Extension +from setuptools import setup, Extension import os ext_modules = [] From 021709f5c1fc0d428bb56bdc634cb6b29a908bb2 Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Fri, 13 Dec 2024 20:53:04 +0000 Subject: [PATCH 03/12] FFT filter changes with Numpy v2.0 (#592) * Include accelerate base tests by default * Change expected output in FFT filter tests to reflect support for single precision in Numpy > 2.0 * Adjust tolerance in pycuda fft filter tests --- pyproject.toml | 1 + .../base_tests/array_utils_test.py | 142 +++++++++--------- .../cuda_pycuda_tests/array_utils_test.py | 12 +- 3 files changed, 78 insertions(+), 77 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d3493bce4..3f4b0c9b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ testpaths = [ "test/ptyscan_tests", "test/template_tests", "test/util_tests", + "test/accelerate_tests/base_tests" ] # this is all BETA according to setuptools diff --git a/test/accelerate_tests/base_tests/array_utils_test.py b/test/accelerate_tests/base_tests/array_utils_test.py index 364352a5c..78540f978 100644 --- a/test/accelerate_tests/base_tests/array_utils_test.py +++ b/test/accelerate_tests/base_tests/array_utils_test.py @@ -305,42 +305,42 @@ def test_fft_filter(self): kernel = np.fft.fftn(rk) output = au.fft_filter(data, kernel, prefactor, postfactor) + + known_test_output = np.array([-0.0000000e+00+0.00000000e+00j, -0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00-0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + -0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + -0.0000000e+00+0.00000000e+00j, -0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00-0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00+0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 0.0000000e+00-0.00000000e+00j, + 0.0000000e+00+0.00000000e+00j, 6.1097220e-05+2.92563982e-05j, + 4.0044695e-05+2.52102855e-05j, 8.9999994e+02+9.00000000e+02j, + 8.9999988e+02+8.99999939e+02j, 5.0999994e+02+5.09999939e+02j, + 1.9365043e-05+5.84280206e-05j, 3.0681291e-05+2.31116355e-05j, + 1.2552022e-05-1.01537153e-05j, -1.4034913e-05-1.17988075e-05j, + -1.9193330e-05+2.72889110e-07j, 1.3895768e-05+1.64778357e-05j, + 6.5228807e-05+2.45708943e-05j, 3.8999994e+02+3.89999939e+02j, + 8.9999988e+02+8.99999878e+02j, 8.9999982e+02+8.99999878e+02j, + 3.0000015e+01+3.00000248e+01j, 3.8863189e-05+3.26705631e-05j, + 2.8768281e-06-1.62116921e-05j, -3.2418033e-05-1.97073969e-05j, + -6.6843757e-05+7.19546824e-06j, 6.5036993e-06+3.95851657e-06j, + -2.4053887e-05+9.88548163e-06j, 1.5231475e-05+1.31202614e-06j, + 8.7000000e+01+8.70000305e+01j, 6.1035156e-05+0.00000000e+00j, + 6.1035156e-05+0.00000000e+00j, -2.4943074e-07+6.62429193e-06j, + 1.6712515e-06-2.97475322e-06j, 1.9025241e-05+2.97752194e-07j, + -9.2436176e-07-3.86252796e-05j, -8.8145862e-06-9.89961700e-06j, + -1.5782407e-06+1.01533060e-05j, -4.7593076e-06+2.96332291e-05j]) - known_test_output = np.array([-0.00000000e+00+0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - -0.00000000e+00 + 0.00000000e+00j, 0.00000000e+00 - 0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, -0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - -0.00000000e+00+0.00000000e+00j, -0.00000000e+00+0.00000000e+00j, - -0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 8.66422277e-14+4.86768828e-14j, - 7.23113320e-14+2.82331542e-14j, 9.00000000e+02+9.00000000e+02j, - 9.00000000e+02+9.00000000e+02j, 5.10000000e+02+5.10000000e+02j, - 1.41172830e-14+3.62223425e-14j, 2.61684238e-14-4.13866575e-14j, - 2.16691314e-14-1.95102733e-14j, -1.36536942e-13-9.94589021e-14j, - -1.42905371e-13-5.77964697e-14j, -5.00005072e-14+4.08620637e-14j, - 6.38160272e-14+7.61753583e-14j, 3.90000000e+02+3.90000000e+02j, - 9.00000000e+02+9.00000000e+02j, 9.00000000e+02+9.00000000e+02j, - 3.00000000e+01+3.00000000e+01j, 8.63255773e-14+7.08532924e-14j, - 1.80941313e-14-3.85517154e-14j, 7.84277340e-14-1.32008745e-14j, - -6.57025196e-14-1.72739350e-14j, -6.69570857e-15+6.49622898e-14j, - 6.27436466e-15+7.57162569e-14j, 2.01150157e-15+3.65538558e-14j, - 8.70000000e+01+8.70000000e+01j, -1.13686838e-13-1.70530257e-13j, - 0.00000000e+00-2.27373675e-13j, -1.84492121e-14-9.21502853e-14j, - 2.12418687e-14-8.62209232e-14j, 1.20880692e-13+3.86522371e-14j, - 1.03754734e-13+9.19851759e-14j, 5.50926123e-14+1.17150422e-13j, - -5.47869215e-14+5.87176511e-14j, -3.52652980e-14+8.44455504e-15j]) - - np.testing.assert_array_almost_equal(output.flat[::2000], known_test_output) + np.testing.assert_array_almost_equal(output.flat[::2000], known_test_output, decimal=5) def test_fft_filter_batched(self): data = np.zeros((2,256, 512), dtype=COMPLEX_TYPE) @@ -356,42 +356,42 @@ def test_fft_filter_batched(self): kernel = np.fft.fftn(rk) output = au.fft_filter(data, kernel, prefactor, postfactor) - - known_test_output = np.array([-0.00000000e+00+0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - -0.00000000e+00 + 0.00000000e+00j, 0.00000000e+00 - 0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, -0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - -0.00000000e+00+0.00000000e+00j, -0.00000000e+00+0.00000000e+00j, - -0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00+0.00000000e+00j, - 0.00000000e+00+0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 0.00000000e+00-0.00000000e+00j, - 0.00000000e+00-0.00000000e+00j, 8.66422277e-14+4.86768828e-14j, - 7.23113320e-14+2.82331542e-14j, 9.00000000e+02+9.00000000e+02j, - 9.00000000e+02+9.00000000e+02j, 5.10000000e+02+5.10000000e+02j, - 1.41172830e-14+3.62223425e-14j, 2.61684238e-14-4.13866575e-14j, - 2.16691314e-14-1.95102733e-14j, -1.36536942e-13-9.94589021e-14j, - -1.42905371e-13-5.77964697e-14j, -5.00005072e-14+4.08620637e-14j, - 6.38160272e-14+7.61753583e-14j, 3.90000000e+02+3.90000000e+02j, - 9.00000000e+02+9.00000000e+02j, 9.00000000e+02+9.00000000e+02j, - 3.00000000e+01+3.00000000e+01j, 8.63255773e-14+7.08532924e-14j, - 1.80941313e-14-3.85517154e-14j, 7.84277340e-14-1.32008745e-14j, - -6.57025196e-14-1.72739350e-14j, -6.69570857e-15+6.49622898e-14j, - 6.27436466e-15+7.57162569e-14j, 2.01150157e-15+3.65538558e-14j, - 8.70000000e+01+8.70000000e+01j, -1.13686838e-13-1.70530257e-13j, - 0.00000000e+00-2.27373675e-13j, -1.84492121e-14-9.21502853e-14j, - 2.12418687e-14-8.62209232e-14j, 1.20880692e-13+3.86522371e-14j, - 1.03754734e-13+9.19851759e-14j, 5.50926123e-14+1.17150422e-13j, - -5.47869215e-14+5.87176511e-14j, -3.52652980e-14+8.44455504e-15j]) - - np.testing.assert_array_almost_equal(output[1].flat[::2000], known_test_output) + + known_test_output = np.array([ 0.00000000e+00-0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + -0.00000000e+00+0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00-0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00-0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00+0.0000000e+00j, + 0.00000000e+00+0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00-0.0000000e+00j, 0.00000000e+00-0.0000000e+00j, + 0.00000000e+00-0.0000000e+00j, 4.86995195e-05-9.1511911e-06j, + 5.89395277e-05+3.6706428e-05j, 8.99999817e+02+9.0000000e+02j, + 8.99999817e+02+9.0000000e+02j, 5.09999969e+02+5.0999997e+02j, + 6.86399580e-05+5.5245564e-05j, -2.15578075e-06-8.0761157e-07j, + -5.99612467e-05-3.7489859e-05j, -2.08058154e-05-1.7001423e-05j, + -3.15661709e-05-2.0192698e-05j, -1.17410173e-05-2.3929812e-05j, + 8.41844594e-05+4.9635066e-05j, 3.90000031e+02+3.9000003e+02j, + 8.99999817e+02+8.9999994e+02j, 8.99999817e+02+8.9999994e+02j, + 3.00000153e+01+3.0000000e+01j, 4.75842753e-05+1.7961407e-05j, + -1.28229876e-05-3.3492659e-05j, -1.50405585e-05+3.0159079e-05j, + -1.00799960e-04-6.6932058e-05j, -4.90295024e-05-3.6601130e-05j, + -4.48861247e-05-1.4717044e-05j, 2.60417364e-05-8.3221821e-06j, + 8.69999847e+01+8.7000046e+01j, 4.31583721e-05+4.3158372e-05j, + 4.31583721e-05+4.3158372e-05j, 4.04649109e-06-1.6836095e-05j, + 1.37377283e-05+5.2577798e-06j, -2.30404657e-05-3.4596611e-05j, + -1.33214944e-05-3.2517899e-05j, 2.45428764e-05-3.5186855e-07j, + -1.85950885e-05-2.1921931e-05j, -1.65030433e-05-8.0249208e-07j]) + + np.testing.assert_array_almost_equal(output[1].flat[::2000], known_test_output, decimal=5) def test_complex_gaussian_filter_fft(self): diff --git a/test/accelerate_tests/cuda_pycuda_tests/array_utils_test.py b/test/accelerate_tests/cuda_pycuda_tests/array_utils_test.py index be82cc292..d11584b01 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/array_utils_test.py +++ b/test/accelerate_tests/cuda_pycuda_tests/array_utils_test.py @@ -580,7 +580,7 @@ def test_fft_filter_UNITY(self): output = au.fft_filter(data, kernel, prefactor, postfactor) - np.testing.assert_allclose(output, data_dev.get(), rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(output, data_dev.get(), rtol=1e-5, atol=1e-5) def test_fft_filter_batched_UNITY(self): sh = (2,16, 35) @@ -607,7 +607,7 @@ def test_fft_filter_batched_UNITY(self): output = au.fft_filter(data, kernel, prefactor, postfactor) print(data_dev.get()) - np.testing.assert_allclose(output, data_dev.get(), rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(output, data_dev.get(), rtol=1e-5, atol=1e-5) def test_complex_gaussian_filter_fft_little_blurring_UNITY(self): # Arrange @@ -624,7 +624,7 @@ def test_complex_gaussian_filter_fft_little_blurring_UNITY(self): out_exp = au.complex_gaussian_filter_fft(data, mfs) out = data_dev.get() - np.testing.assert_allclose(out_exp, out, atol=1e-6) + np.testing.assert_allclose(out_exp, out, atol=1e-5) def test_complex_gaussian_filter_fft_more_blurring_UNITY(self): # Arrange @@ -641,7 +641,7 @@ def test_complex_gaussian_filter_fft_more_blurring_UNITY(self): out_exp = au.complex_gaussian_filter_fft(data, mfs) out = data_dev.get() - np.testing.assert_allclose(out_exp, out, atol=1e-6) + np.testing.assert_allclose(out_exp, out, atol=1e-5) def test_complex_gaussian_filter_fft_nonsquare_UNITY(self): # Arrange @@ -660,7 +660,7 @@ def test_complex_gaussian_filter_fft_nonsquare_UNITY(self): out_exp = au.complex_gaussian_filter_fft(data, mfs) out = data_dev.get() - np.testing.assert_allclose(out_exp, out, atol=1e-6) + np.testing.assert_allclose(out_exp, out, atol=1e-5) def test_complex_gaussian_filter_fft_batched(self): # Arrange @@ -680,4 +680,4 @@ def test_complex_gaussian_filter_fft_batched(self): out_exp = au.complex_gaussian_filter_fft(data, mfs) out = data_dev.get() - np.testing.assert_allclose(out_exp, out, atol=1e-6) \ No newline at end of file + np.testing.assert_allclose(out_exp, out, atol=1e-5) \ No newline at end of file From 89de9cc125f3ff3a095dc0d086f41b1bdef557d0 Mon Sep 17 00:00:00 2001 From: Timothy Poon <62692924+ptim0626@users.noreply.github.com> Date: Thu, 6 Mar 2025 16:50:43 +0000 Subject: [PATCH 04/12] Control the start of probe update in WASP (#601) * Use probe_update_start to control the probe update in WASP * Add WASP tests with probe_update_start * Update WASP publication * Remove get_cmap as suggested in https://github.com/matplotlib/matplotlib/issues/20853 * Handle colormap registration in different versions of matplotlib * Skip FFT filter tests if Numpy < 2.0 --------- Co-authored-by: Benedikt Daurer --- ptypy/custom/WASP.py | 28 ++++++----- ptypy/custom/WASP_cupy.py | 8 ++-- ptypy/custom/WASP_pycuda.py | 9 ++-- ptypy/custom/WASP_serial.py | 8 ++-- ptypy/utils/plot_utils.py | 48 ++++++++++--------- .../accelerate_tests/base_tests/WASP_tests.py | 17 +++++++ .../base_tests/array_utils_test.py | 5 +- .../cuda_cupy_tests/WASP_tests.py | 17 +++++++ .../cuda_pycuda_tests/WASP_tests.py | 17 +++++++ 9 files changed, 111 insertions(+), 46 deletions(-) diff --git a/ptypy/custom/WASP.py b/ptypy/custom/WASP.py index 275224b9a..3ef84bb5c 100644 --- a/ptypy/custom/WASP.py +++ b/ptypy/custom/WASP.py @@ -124,11 +124,11 @@ def __init__(self, ptycho_parent, pars=None): self.article = dict( title='WASP: Weighted Average of Sequential Projections for ptychographic phase retrieval', author='A. M. Maiden, W. Mei and P. Li', - journal='Optica', - volume=42, + journal='Optics Express', + volume=32, year=2024, - page=42, - doi='doi', + page=21327, + doi='10.1364/OE.516946', comment='Weighted Average of Sequential Projections', ) self.ptycho.citations.add_article(**self.article) @@ -237,7 +237,8 @@ def overlap_update(self): # update object first, then probe, and accumulate their sum for # averaging after going through all the views self.object_update(view, ex_old) - self.probe_update(view, ex_old, ob_old) + if self.p.probe_update_start <= self.curiter: + self.probe_update(view, ex_old, ob_old) # WASP self.wasp_averaging() @@ -323,16 +324,17 @@ def wasp_averaging(self): self.clip_object(s) - for name, p in self.pr.storages.items(): - pr_sum_nmr = self.pr_sum_nmr.storages[name].data - pr_sum_dnm = self.pr_sum_dnm.storages[name].data + if self.p.probe_update_start <= self.curiter: + for name, p in self.pr.storages.items(): + pr_sum_nmr = self.pr_sum_nmr.storages[name].data + pr_sum_dnm = self.pr_sum_dnm.storages[name].data - parallel.allreduce(pr_sum_nmr) - parallel.allreduce(pr_sum_dnm) + parallel.allreduce(pr_sum_nmr) + parallel.allreduce(pr_sum_dnm) - # avoid division by zero - is_zero = np.isclose(pr_sum_dnm, 0) - p.data = np.where(is_zero, pr_sum_nmr, pr_sum_nmr / pr_sum_dnm) + # avoid division by zero + is_zero = np.isclose(pr_sum_dnm, 0) + p.data = np.where(is_zero, pr_sum_nmr, pr_sum_nmr / pr_sum_dnm) def clip_object(self, ob): """Copied from _ProjectionEngine diff --git a/ptypy/custom/WASP_cupy.py b/ptypy/custom/WASP_cupy.py index 06d1507fb..c23448c49 100644 --- a/ptypy/custom/WASP_cupy.py +++ b/ptypy/custom/WASP_cupy.py @@ -338,8 +338,9 @@ def engine_iterate(self, num=1): ob_old = ob.copy() POK.ob_update_wasp(addr, ob, pr, ex, aux, ob_sum_nmr, ob_sum_dnm, alpha=self.p.alpha) - POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, - pr_sum_dnm, beta=self.p.beta) + if self.p.probe_update_start <= self.curiter: + POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, + pr_sum_dnm, beta=self.p.beta) ## compute log-likelihood if self.p.compute_log_likelihood: @@ -355,7 +356,8 @@ def engine_iterate(self, num=1): self.multigpu.allReduceSum(pr_sum_dnm) POK.avg_wasp(ob, ob_sum_nmr, ob_sum_dnm) - POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) + if self.p.probe_update_start <= self.curiter: + POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) # Clip object if self.p.clip_object is not None: diff --git a/ptypy/custom/WASP_pycuda.py b/ptypy/custom/WASP_pycuda.py index e96bce1b5..e631e855a 100644 --- a/ptypy/custom/WASP_pycuda.py +++ b/ptypy/custom/WASP_pycuda.py @@ -338,9 +338,9 @@ def engine_iterate(self, num=1): ob_old = ob.copy() POK.ob_update_wasp(addr, ob, pr, ex, aux, ob_sum_nmr, ob_sum_dnm, alpha=self.p.alpha) - POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, - pr_sum_dnm, beta=self.p.beta) - + if self.p.probe_update_start <= self.curiter: + POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, + pr_sum_dnm, beta=self.p.beta) ## compute log-likelihood if self.p.compute_log_likelihood: @@ -356,7 +356,8 @@ def engine_iterate(self, num=1): self.multigpu.allReduceSum(pr_sum_dnm) POK.avg_wasp(ob, ob_sum_nmr, ob_sum_dnm) - POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) + if self.p.probe_update_start <= self.curiter: + POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) # Clip object if self.p.clip_object is not None: diff --git a/ptypy/custom/WASP_serial.py b/ptypy/custom/WASP_serial.py index 96391ea27..ccd961b2a 100644 --- a/ptypy/custom/WASP_serial.py +++ b/ptypy/custom/WASP_serial.py @@ -292,8 +292,9 @@ def engine_iterate(self, num=1): ob_old = ob.copy() POK.ob_update_wasp(addr, ob, pr, ex, aux, ob_sum_nmr, ob_sum_dnm, alpha=self.p.alpha) - POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, - pr_sum_dnm, beta=self.p.beta) + if self.p.probe_update_start <= self.curiter: + POK.pr_update_wasp(addr, pr, ob_old, ex, aux, pr_sum_nmr, + pr_sum_dnm, beta=self.p.beta) self.benchmark.wasp_ob_pr_update += time.time() - t1 self.benchmark.calls_wasp_ob_pr_update += 1 @@ -321,7 +322,8 @@ def engine_iterate(self, num=1): parallel.allreduce(pr_sum_dnm) POK.avg_wasp(ob, ob_sum_nmr, ob_sum_dnm) - POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) + if self.p.probe_update_start <= self.curiter: + POK.avg_wasp(pr, pr_sum_nmr, pr_sum_dnm) self.benchmark.wasp_averaging += time.time() - t1 self.benchmark.calls_wasp_averaging += 1 diff --git a/ptypy/utils/plot_utils.py b/ptypy/utils/plot_utils.py index 2f85e72b5..a4a81a596 100644 --- a/ptypy/utils/plot_utils.py +++ b/ptypy/utils/plot_utils.py @@ -128,7 +128,7 @@ def pause(timeout=-1, message=None): print(message) time.sleep(timeout) -# BD: With version 9.1.0 of PIL, _MODE_CONV has been removed, +# BD: With version 9.1.0 of PIL, _MODE_CONV has been removed, # see here: https://github.com/python-pillow/Pillow/pull/6057 # can't see a reason why this is still needed, therefore commenting it out # FIXME: Is this still needed? @@ -179,7 +179,7 @@ def complex2hsv(cin, vmin=0., vmax=None): else: assert vmin < vmax v = (v.clip(vmin, vmax)-vmin)/(vmax-vmin) - + return np.asarray((h, s, v)) @@ -345,7 +345,7 @@ def imsave(a, filename=None, vmin=None, vmax=None, cmap=None): uses a matplotlib colormap with name 'gray' """ if str(cmap) == cmap: - cmap = mpl.colormaps.get_cmap(cmap) + cmap = mpl.colormaps[cmap] if a.dtype.kind == 'c': i = complex2rgb(a, vmin=vmin, vmax=vmax) @@ -415,8 +415,12 @@ def imload(filename): (0.340, 1, 1), (0.650, 0, 0), (1.000, 0, 0))} - -mpl.colormaps.register(cmap=LinearSegmentedColormap(name='franzmap', segmentdata=franzmap_cm, N=256)) + +_lscm = LinearSegmentedColormap(name='franzmap', segmentdata=franzmap_cm, N=256) +try: + mpl.colormaps.register(cmap=_lscm) +except AttributeError: + matplotlib.cm.register_cmap(cmap=_lscm) def franzmap(): """\ @@ -426,7 +430,7 @@ def franzmap(): im = mpl.pyplot.gci() if im is not None: - im.set_cmap(matplotlib.cm.get_cmap('franzmap')) + im.set_cmap(mpl.colormaps['franzmap']) mpl.pyplot.draw_if_interactive() @@ -595,46 +599,46 @@ class PtyAxis(object): """ Plot environment for matplotlib to allow for a Image plot with color axis, potentially of a potentially complex array. - + Please note that this class may undergo changes or become obsolete altogether. """ def __init__(self, ax=None, data=None, channel='r', cmap=None, fontsize=8, **kwargs): """ - + Parameters ---------- - + ax : pyplot.axis An axis in matplotlib figure. If ``None`` a figure with a single axis will be created. - + data : numpy.ndarray - The (complex) twodimensional data to be displayed. - + The (complex) twodimensional data to be displayed. + channel : str Choose - + - ``'a'`` to plot absolute/modulus value of the data, - ``'p'`` to plot phase value of the data, - ``'a'`` to plot real value of the data, - ``'a'`` to plot imaginary value of the data, - - ``'a'`` to plot a composite image where phase channel maps to hue and + - ``'a'`` to plot a composite image where phase channel maps to hue and modulus channel maps to brightness of the color. - - cmap : str + + cmap : str String representation of one of matplotlibs colormaps. - + fontsize : int Base font size of labels, etc. - + Keyword Arguments ----------------- vmin, vmax : float Minimum and maximum value for colormap scaling - + rmramp : bool Remove phase ramp if ``True``, default is ``False`` - + """ if ax is None: fig = plt.figure() @@ -679,10 +683,10 @@ def set_channel(self, channel, update=True): def set_cmap(self, cmap, update=True): try: - self.cmap = mpl.colormaps.get_cmap(cmap) + self.cmap = mpl.colormaps[cmap] except: logger.debug("Colormap `%s` not found. Using `gray`" % str(cmap)) - self.cmap = mpl.colormaps.get_cmap('gray') + self.cmap = mpl.colormaps['gray'] if update: self._update() self._update_colorscale() diff --git a/test/accelerate_tests/base_tests/WASP_tests.py b/test/accelerate_tests/base_tests/WASP_tests.py index a0ea4514e..5cf73982c 100644 --- a/test/accelerate_tests/base_tests/WASP_tests.py +++ b/test/accelerate_tests/base_tests/WASP_tests.py @@ -99,6 +99,7 @@ def test_WASP_serial_base(self): engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.random_seed = 721 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) @@ -120,12 +121,28 @@ def test_WASP_serial_clip(self): if parallel.master: self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_serial_probe_start(self): + out = [] + for eng in ["WASP", "WASP_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 10 + engine_params.clip_object = (0, 2) + engine_params.probe_update_start = 4 + engine_params.random_seed = 721 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + + if parallel.master: + self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_serial_alpha_beta(self): out = [] for eng in ["WASP", "WASP_serial"]: engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.alpha = 0.64 engine_params.beta = 0.94 engine_params.random_seed = 721 diff --git a/test/accelerate_tests/base_tests/array_utils_test.py b/test/accelerate_tests/base_tests/array_utils_test.py index 78540f978..4d8081903 100644 --- a/test/accelerate_tests/base_tests/array_utils_test.py +++ b/test/accelerate_tests/base_tests/array_utils_test.py @@ -2,11 +2,12 @@ Tests for the array_utils module ''' -import unittest +import unittest, pytest import numpy as np from ptypy.accelerate.base import FLOAT_TYPE, COMPLEX_TYPE from ptypy.accelerate.base import array_utils as au +_numpy_version = tuple(map(int, np.__version__.split("."))) class ArrayUtilsTest(unittest.TestCase): @@ -291,6 +292,7 @@ def test_crop_pad_3(self): dtype=np.complex64) np.testing.assert_array_almost_equal(A, exp_A) + @pytest.mark.skipif(_numpy_version < (2,0), reason="requires Numpy 2.0 or higher to match expected output") def test_fft_filter(self): data = np.zeros((256, 512), dtype=COMPLEX_TYPE) data[64:-64,128:-128] = 1 + 1.j @@ -342,6 +344,7 @@ def test_fft_filter(self): np.testing.assert_array_almost_equal(output.flat[::2000], known_test_output, decimal=5) + @pytest.mark.skipif(_numpy_version < (2,0), reason="requires Numpy 2.0 or higher to match expected output") def test_fft_filter_batched(self): data = np.zeros((2,256, 512), dtype=COMPLEX_TYPE) data[:,64:-64,128:-128] = 1 + 1.j diff --git a/test/accelerate_tests/cuda_cupy_tests/WASP_tests.py b/test/accelerate_tests/cuda_cupy_tests/WASP_tests.py index 7ccda4930..91bc0a1fc 100644 --- a/test/accelerate_tests/cuda_cupy_tests/WASP_tests.py +++ b/test/accelerate_tests/cuda_cupy_tests/WASP_tests.py @@ -98,6 +98,7 @@ def test_WASP_cupy_base(self): engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.random_seed = 721 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) @@ -119,12 +120,28 @@ def test_WASP_cupy_clip(self): if parallel.master: self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_cupy_probe_start(self): + out = [] + for eng in ["WASP_serial", "WASP_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 10 + engine_params.clip_object = (0, 2) + engine_params.probe_update_start = 4 + engine_params.random_seed = 721 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + + if parallel.master: + self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_cupy_alpha_beta(self): out = [] for eng in ["WASP_serial", "WASP_cupy"]: engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.alpha = 0.64 engine_params.beta = 0.94 engine_params.random_seed = 721 diff --git a/test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py b/test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py index f8bf5f0c8..e8527481d 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py +++ b/test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py @@ -98,6 +98,7 @@ def test_WASP_pycuda_base(self): engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.random_seed = 721 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) @@ -119,12 +120,28 @@ def test_WASP_pycuda_clip(self): if parallel.master: self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_pycuda_probe_start(self): + out = [] + for eng in ["WASP_serial", "WASP_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 10 + engine_params.clip_object = (0, 2) + engine_params.probe_update_start = 4 + engine_params.random_seed = 721 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + + if parallel.master: + self.check_engine_output(out, plotting=False, debug=False) + def test_WASP_pycuda_alpha_beta(self): out = [] for eng in ["WASP_serial", "WASP_pycuda"]: engine_params = u.Param() engine_params.name = eng engine_params.numiter = 10 + engine_params.clip_object = (0, 2) engine_params.alpha = 0.64 engine_params.beta = 0.94 engine_params.random_seed = 721 From 9b18a2e29bc319ca1f025bb2a2a18de58d14024b Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Fri, 28 Mar 2025 19:39:42 +0000 Subject: [PATCH 05/12] Renamed engine tests to be picked up by pytest (#597) * Renamed engine tests to be picked up by pytest * Fix parsing of sigma in smoothing regulariser * Disable smoothing regulariser test * Disable CI hack for pyfftw and Python 3.12 --- .github/workflows/test.yml | 6 +++--- ptypy/accelerate/base/array_utils.py | 18 ++++++++++++------ ptypy/engines/ML.py | 2 +- .../base_tests/{WASP_tests.py => WASP_test.py} | 0 .../{engine_tests.py => engine_test.py} | 6 ++++-- .../{WASP_tests.py => WASP_test.py} | 0 .../{WASP_tests.py => WASP_test.py} | 0 7 files changed, 20 insertions(+), 12 deletions(-) rename test/accelerate_tests/base_tests/{WASP_tests.py => WASP_test.py} (100%) rename test/accelerate_tests/base_tests/{engine_tests.py => engine_test.py} (96%) rename test/accelerate_tests/cuda_cupy_tests/{WASP_tests.py => WASP_test.py} (100%) rename test/accelerate_tests/cuda_pycuda_tests/{WASP_tests.py => WASP_test.py} (100%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b37bc8c30..18aff26cf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,9 +52,9 @@ jobs: run: | # replace python version in dependencies sed -i 's/python/python=${{ matrix.python-version }}/' dependencies_${{ matrix.conda-env }}.yml - if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.12' ]; then - sed -i '/- pyfftw/d' dependencies_${{ matrix.conda-env }}.yml - fi + # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.12' ]; then + # sed -i '/- pyfftw/d' dependencies_${{ matrix.conda-env }}.yml + # fi # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.9' ]; then # sed -i '/- mpi4py/d' dependencies_${{ matrix.conda-env }}.yml # fi diff --git a/ptypy/accelerate/base/array_utils.py b/ptypy/accelerate/base/array_utils.py index bec4cc8e4..5af588413 100644 --- a/ptypy/accelerate/base/array_utils.py +++ b/ptypy/accelerate/base/array_utils.py @@ -70,14 +70,20 @@ def complex_gaussian_filter(input, mfs): ''' takes 2D and 3D arrays. Complex input, complex output. mfs has len 0 2: + if isinstance(mfs, list) and len(mfs) > 2: raise NotImplementedError("Only batches of 2D arrays allowed!") + if isinstance(mfs, (int, float)): + mfs_list = [mfs, mfs] + if input.ndim == 3: + mfs_list = [0, mfs, mfs] + elif isinstance(mfs, (list,tuple)): + mfs_list = list(mfs) + if input.ndim == 3: + mfs_list.insert(0,0) + else: + raise NotImplementedError(f"Cannot interpret mfs of type {type(mfs)}") - if input.ndim == 3: - mfs = np.insert(mfs, 0, 0) - - return (ndi.gaussian_filter(np.real(input), mfs) + 1j * ndi.gaussian_filter(np.imag(input), mfs)).astype( - input.dtype) + return (ndi.gaussian_filter(np.real(input), mfs_list).astype(input.dtype) + 1j * ndi.gaussian_filter(np.imag(input), mfs_list)) def complex_gaussian_filter_fft(input, mfs): diff --git a/ptypy/engines/ML.py b/ptypy/engines/ML.py index 9d5bb7d1a..b5c93dfbe 100644 --- a/ptypy/engines/ML.py +++ b/ptypy/engines/ML.py @@ -1290,7 +1290,7 @@ def __init__(self, sigma): self.sigma = sigma def __call__(self, x): - return u.c_gf(x, [0, self.sigma, self.sigma]) + return u.c_gf(x, [0, self.sigma, self.sigma]) # from scipy.signal import correlate2d # class HannFilt: diff --git a/test/accelerate_tests/base_tests/WASP_tests.py b/test/accelerate_tests/base_tests/WASP_test.py similarity index 100% rename from test/accelerate_tests/base_tests/WASP_tests.py rename to test/accelerate_tests/base_tests/WASP_test.py diff --git a/test/accelerate_tests/base_tests/engine_tests.py b/test/accelerate_tests/base_tests/engine_test.py similarity index 96% rename from test/accelerate_tests/base_tests/engine_tests.py rename to test/accelerate_tests/base_tests/engine_test.py index dbfc1e9f6..855536993 100644 --- a/test/accelerate_tests/base_tests/engine_tests.py +++ b/test/accelerate_tests/base_tests/engine_test.py @@ -14,6 +14,7 @@ import tempfile import shutil import numpy as np +import pytest class MLSerialTest(unittest.TestCase): @@ -68,9 +69,9 @@ def check_engine_output(self, output, plotting=False, debug=False): RMSE_ob = (np.mean(np.abs(OBJ_ML_serial - OBJ_ML)**2)) RMSE_pr = (np.mean(np.abs(PRB_ML_serial - PRB_ML)**2)) # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-1, err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-1, err_msg="The object arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") @@ -134,6 +135,7 @@ def test_ML_serial_floating(self): scanmodel="BlockFull", autosave=False, verbose_level="critical")) self.check_engine_output(out, plotting=False, debug=False) + @pytest.mark.skip(reason="Funny behaviour with this test, most likely related to Gaussian filter, see issue #607") def test_ML_serial_smoothing_regularizer(self): out = [] for eng in ["ML", "ML_serial"]: diff --git a/test/accelerate_tests/cuda_cupy_tests/WASP_tests.py b/test/accelerate_tests/cuda_cupy_tests/WASP_test.py similarity index 100% rename from test/accelerate_tests/cuda_cupy_tests/WASP_tests.py rename to test/accelerate_tests/cuda_cupy_tests/WASP_test.py diff --git a/test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py b/test/accelerate_tests/cuda_pycuda_tests/WASP_test.py similarity index 100% rename from test/accelerate_tests/cuda_pycuda_tests/WASP_tests.py rename to test/accelerate_tests/cuda_pycuda_tests/WASP_test.py From d1ac423a5e36f497210f0633e0dd000200e05be2 Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Mon, 28 Apr 2025 15:38:31 +0100 Subject: [PATCH 06/12] Ignore F824 for linting in actions (#608) --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 18aff26cf..e71bda774 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -69,7 +69,7 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --ignore=F824 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest From 7e5abf6df0344496c33a1c557b7f6e40efc296d5 Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Mon, 28 Apr 2025 16:59:09 +0100 Subject: [PATCH 07/12] Cleaning up actions (#609) * Cleaning up actions * Make sure to use miniforge --- .github/workflows/test.yml | 46 +++++--------------------------------- 1 file changed, 5 insertions(+), 41 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e71bda774..1567443c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,55 +48,19 @@ jobs: channels: conda-forge activate-environment: ptypy_env python-version: ${{ matrix.python-version }} + miniforge-version: latest - name: Install ${{ matrix.conda-env }} dependencies run: | - # replace python version in dependencies sed -i 's/python/python=${{ matrix.python-version }}/' dependencies_${{ matrix.conda-env }}.yml - # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.12' ]; then - # sed -i '/- pyfftw/d' dependencies_${{ matrix.conda-env }}.yml - # fi - # if [ ${{ matrix.conda-env }} == 'full' ] && [ ${{ matrix.python-version }} == '3.9' ]; then - # sed -i '/- mpi4py/d' dependencies_${{ matrix.conda-env }}.yml - # fi - conda install --solver=classic mpich + conda install -c conda-forge mpich conda env update --file dependencies_${{ matrix.conda-env }}.yml --name ptypy_env - conda install --solver=classic flake8 pytest pytest-cov + conda install -c conda-forge flake8 pytest pytest-cov conda list - name: Prepare ptypy - run: | - # Install ptypy - pip install . + run: pip install . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --ignore=F824 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest - run: | - # pytest ptypy/test -v --doctest-modules --junitxml=junit/test-results.xml --cov=ptypy --cov-report=xml --cov-report=html --cov-config=.coveragerc - pytest -v -# - name: cobertura-report -# if: github.event_name == 'pull_request' && (github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'synchronize') -# uses: 5monkeys/cobertura-action@v7 -# with: -# # The GITHUB_TOKEN for this repo -# repo_token: ${{ secrets.GITHUB_TOKEN }} -# # Path to the cobertura file. -# path: coverage.xml -# # If files with 100% should be skipped from report. -# skip_covered: true -# # Minimum allowed coverage percentage as an integer. -# minimum_coverage: 90 -# only_changed_files: true -# - name: Junit Report to Annotations -# uses: ashley-taylor/junit-report-annotations-action@master -# with: -# # github token -# access-token: ${{ secrets.GITHUB_TOKEN }} -# # glob to junit xml files -# path: junit/test-results.xml -# # include summary annotation -# includeSummary: true -# # max number of failed tests to include -# numFailures: 10 + run: pytest -v From 45dd0c0bd3dc0e4c9920b6fb8afa36cfaad080ea Mon Sep 17 00:00:00 2001 From: Benedikt Daurer Date: Mon, 28 Apr 2025 19:30:43 +0100 Subject: [PATCH 08/12] Bump version to 0.9.0 --- ptypy/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ptypy/version.py b/ptypy/version.py index 16317abfa..0ae9661c4 100644 --- a/ptypy/version.py +++ b/ptypy/version.py @@ -1,6 +1,6 @@ -short_version = '0.8.2' -version = '0.8.2' +short_version = '0.9.0' +version = '0.9.0' release = False if not release: From 205248a954e4acc519dba744428372999367a4ee Mon Sep 17 00:00:00 2001 From: Jari Date: Mon, 28 Apr 2025 19:55:36 +0100 Subject: [PATCH 09/12] Add wavefield preconditioner (#584) * Add Lipschitz preconditioner * Move Lipschitz preconditioner before smoothing preconditioner * Use sqrt of fluence and apply twice * Take sqrt of the sum * Start serializing wavefield precond * Make requested changes to ML.py * Make changes to ML_serial and add kernel functions * Fix smoothing call in ML_serial * Fix abs2 call in kernels * Add kernel regression tests * Tidy up new kernel tests * Fix new kernel tests * Changes to ML_cupy and ML_pycuda * Fix pycuda.cumath import * Add cupy and pycuda kernels * Add serial wavefield precond test * Add cupy and pycuda wavefield precond tests * Add cupy and pycuda kernel regression tests * Add atomic CUDA kernels * Fix types in CUDA kernels * Add non-atomic CUDA kernels * Bugfixes in wavefield kernels * Better abs2 in wavefield kernels * Fixed wavefield kernels and tests * Revert change in templates * Fixed logic in ML engine * Fix login in ML_serial * Final fixes to make wavefield precond work with GPU engines * Remove notebook * More bugfixes in ML cupy/pycuda engine * Skip ML wavefield engine test --------- Co-authored-by: Benedikt Daurer --- ptypy/accelerate/base/engines/ML_serial.py | 67 ++++++++-- ptypy/accelerate/base/kernels.py | 26 ++++ .../cuda_common/ob_update2_ML_wavefield.cu | 123 ++++++++++++++++++ .../cuda_common/ob_update_ML_wavefield.cu | 73 +++++++++++ .../cuda_common/pr_update2_ML_wavefield.cu | 123 ++++++++++++++++++ .../cuda_common/pr_update_ML_wavefield.cu | 73 +++++++++++ ptypy/accelerate/cuda_cupy/engines/ML_cupy.py | 53 +++++++- ptypy/accelerate/cuda_cupy/kernels.py | 105 +++++++++++++++ .../cuda_pycuda/engines/ML_pycuda.py | 51 +++++++- ptypy/accelerate/cuda_pycuda/kernels.py | 88 +++++++++++++ ptypy/engines/ML.py | 102 +++++++++++++-- templates/engines/cupy/moonflower_ML_cupy.py | 4 +- .../cupy/moonflower_ML_cupy_wavefield.py | 58 +++++++++ .../numpy/moonflower_ML_Gaussian_wavefield.py | 56 ++++++++ .../engines/pycuda/moonflower_ML_pycuda.py | 2 +- .../pycuda/moonflower_ML_pycuda_wavefield.py | 58 +++++++++ .../serial/moonflower_ML_serial_wavefield.py | 59 +++++++++ .../base_tests/engine_test.py | 22 +++- .../base_tests/po_update_kernel_test.py | 96 +++++++++++++- .../cuda_cupy_tests/engine_tests.py | 25 +++- .../cuda_cupy_tests/po_update_kernel_test.py | 121 +++++++++++++++++ .../cuda_pycuda_tests/engine_tests.py | 25 +++- .../po_update_kernel_test.py | 121 +++++++++++++++++ 23 files changed, 1494 insertions(+), 37 deletions(-) create mode 100644 ptypy/accelerate/cuda_common/ob_update2_ML_wavefield.cu create mode 100644 ptypy/accelerate/cuda_common/ob_update_ML_wavefield.cu create mode 100644 ptypy/accelerate/cuda_common/pr_update2_ML_wavefield.cu create mode 100644 ptypy/accelerate/cuda_common/pr_update_ML_wavefield.cu create mode 100644 templates/engines/cupy/moonflower_ML_cupy_wavefield.py create mode 100644 templates/engines/numpy/moonflower_ML_Gaussian_wavefield.py create mode 100644 templates/engines/pycuda/moonflower_ML_pycuda_wavefield.py create mode 100644 templates/engines/serial/moonflower_ML_serial_wavefield.py diff --git a/ptypy/accelerate/base/engines/ML_serial.py b/ptypy/accelerate/base/engines/ML_serial.py index 1e953d26a..3618d0f9c 100644 --- a/ptypy/accelerate/base/engines/ML_serial.py +++ b/ptypy/accelerate/base/engines/ML_serial.py @@ -50,6 +50,7 @@ def __init__(self, ptycho_parent, pars=None): self.diff_info = {} self.cn2_ob_grad = 0. self.cn2_pr_grad = 0. + self.sqrt = np.sqrt def engine_initialize(self): """ @@ -162,6 +163,13 @@ def _get_smooth_gradient(self, data, sigma): def _replace_ob_grad(self): new_ob_grad = self.ob_grad_new + + # Wavefield preconditioner for the object + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + s.data /= np.sqrt(self.ob_fln.storages[name].data + + self.p.wavefield_delta_object) + # Smoothing preconditioner if self.smooth_gradient: self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) @@ -175,7 +183,8 @@ def _replace_ob_grad(self): def _replace_pr_grad(self): new_pr_grad = self.pr_grad_new - # probe support + + # Probe support if self.p.probe_update_start <= self.curiter: # Apply probe support if needed for name, s in new_pr_grad.storages.items(): @@ -183,6 +192,12 @@ def _replace_pr_grad(self): else: new_pr_grad.fill(0.) + # Wavefield preconditioner for the probe + if self.p.wavefield_precond: + for name, s in new_pr_grad.storages.items(): + s.data /= np.sqrt(self.pr_fln.storages[name].data + + self.p.wavefield_delta_probe) + norm = Cnorm2(new_pr_grad) dot = np.real(Cdot(new_pr_grad, self.pr_grad)) self.pr_grad << new_pr_grad @@ -240,19 +255,36 @@ def engine_iterate(self, num=1): self.cn2_pr_grad = cn2_new_pr_grad dt = self.ptycho.FType + # 3. Next conjugate self.ob_h *= dt(bt / self.tmin) - - # Smoothing preconditioner - if self.smooth_gradient: + # Wavefield preconditioner for the object (with and without smoothing preconditioner) + if self.p.wavefield_precond: for name, s in self.ob_h.storages.items(): - s.data[:] -= self._get_smooth_gradient(self.ob_grad.storages[name].data, self.smooth_gradient.sigma) + if self.smooth_gradient: + s.data[:] -= self._get_smooth_gradient(self.ob_grad.storages[name].data + / self.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + , self.smooth_gradient.sigma) + else: + s.data[:] -= (self.ob_grad.storages[name].data + / self.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object)) else: - self.ob_h -= self.ob_grad + # Smoothing preconditioner for the object + if self.smooth_gradient: + for name, s in self.ob_h.storages.items(): + s.data[:] -= self._get_smooth_gradient(self.ob_grad.storages[name].data, self.smooth_gradient.sigma) + else: + self.ob_h -= self.ob_grad self.pr_h *= dt(bt / self.tmin) self.pr_grad *= dt(self.scale_p_o) - self.pr_h -= self.pr_grad + # Wavefield preconditioner for the probe + if self.p.wavefield_precond: + for name, s in self.pr_h.storages.items(): + s.data[:] -= (self.pr_grad.storages[name].data + / self.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe)) + else: + self.pr_h -= self.pr_grad # In principle, the way things are now programmed this part # could be iterated over in a real Newton-Raphson style. @@ -416,6 +448,11 @@ def new_grad(self): pr_grad = self.engine.pr_grad_new ob_grad << 0. pr_grad << 0. + if self.engine.p.wavefield_precond: + ob_fln = self.engine.ob_fln + pr_fln = self.engine.pr_fln + ob_fln << 0. + pr_fln << 0. # We need an array for MPI LL = np.array([0.]) @@ -450,6 +487,11 @@ def new_grad(self): prg = pr_grad.S[pID].data I = prep.I + # local references for wavefield precond + if self.engine.p.wavefield_precond: + obf = ob_fln.S[oID].data + prf = pr_fln.S[pID].data + # make propagated exit (to buffer) AWK.build_aux_no_ex(aux, addr, ob, pr, add=False) @@ -465,8 +507,12 @@ def new_grad(self): GDK.error_reduce(addr, err_phot) aux[:] = BW(aux) - POK.ob_update_ML(addr, obg, pr, aux) - POK.pr_update_ML(addr, prg, ob, aux) + if self.engine.p.wavefield_precond: + POK.ob_update_ML_wavefield(addr, obg, obf, pr, aux) + POK.pr_update_ML_wavefield(addr, prg, prf, ob, aux) + else: + POK.ob_update_ML(addr, obg, pr, aux) + POK.pr_update_ML(addr, prg, ob, aux) for dID, prep in self.engine.diff_info.items(): err_phot = prep.err_phot @@ -480,6 +526,9 @@ def new_grad(self): # MPI reduction of gradients ob_grad.allreduce() pr_grad.allreduce() + if self.engine.p.wavefield_precond: + ob_fln.allreduce() + pr_fln.allreduce() parallel.allreduce(LL) # Object regularizer diff --git a/ptypy/accelerate/base/kernels.py b/ptypy/accelerate/base/kernels.py index c5343f940..22eb37747 100644 --- a/ptypy/accelerate/base/kernels.py +++ b/ptypy/accelerate/base/kernels.py @@ -608,6 +608,32 @@ def pr_update_ML(self, addr, pr, ob, ex, fac=2.0): ex[exc[0], exc[1]:exc[1] + rows, exc[2]:exc[2] + cols] * fac return + def ob_update_ML_wavefield(self, addr, ob, obf, pr, ex, fac=2.0): + + sh = addr.shape + flat_addr = addr.reshape(sh[0] * sh[1], sh[2], sh[3]) + rows, cols = ex.shape[-2:] + for ind, (prc, obc, exc, mac, dic) in enumerate(flat_addr): + ob[obc[0], obc[1]:obc[1] + rows, obc[2]:obc[2] + cols] += \ + pr[prc[0], prc[1]:prc[1] + rows, prc[2]:prc[2] + cols].conj() * \ + ex[exc[0], exc[1]:exc[1] + rows, exc[2]:exc[2] + cols] * fac + obf[obc[0], obc[1]:obc[1] + rows, obc[2]:obc[2] + cols] += \ + abs2(pr[prc[0], prc[1]:prc[1] + rows, prc[2]:prc[2] + cols]) + return + + def pr_update_ML_wavefield(self, addr, pr, prf, ob, ex, fac=2.0): + + sh = addr.shape + flat_addr = addr.reshape(sh[0] * sh[1], sh[2], sh[3]) + rows, cols = ex.shape[-2:] + for ind, (prc, obc, exc, mac, dic) in enumerate(flat_addr): + pr[prc[0], prc[1]:prc[1] + rows, prc[2]:prc[2] + cols] += \ + ob[obc[0], obc[1]:obc[1] + rows, obc[2]:obc[2] + cols].conj() * \ + ex[exc[0], exc[1]:exc[1] + rows, exc[2]:exc[2] + cols] * fac + prf[prc[0], prc[1]:prc[1] + rows, prc[2]:prc[2] + cols] += \ + abs2(ob[obc[0], obc[1]:obc[1] + rows, obc[2]:obc[2] + cols]) + return + def ob_update_local(self, addr, ob, pr, ex, aux, prn, a=0., b=1.): sh = addr.shape flat_addr = addr.reshape(sh[0] * sh[1], sh[2], sh[3]) diff --git a/ptypy/accelerate/cuda_common/ob_update2_ML_wavefield.cu b/ptypy/accelerate/cuda_common/ob_update2_ML_wavefield.cu new file mode 100644 index 000000000..649ce4a89 --- /dev/null +++ b/ptypy/accelerate/cuda_common/ob_update2_ML_wavefield.cu @@ -0,0 +1,123 @@ +/** ob_update2_ML_wavefield. + * + * Data types: + * - IN_TYPE: the data type for the inputs (float or double) + * - OUT_TYPE: the data type for the outputs (float or double) + * - MATH_TYPE: the data type used for computation + * - ACC_TYPE: accumulator for the ob field + * + * NOTE: This version of ob_update goes over all tiles that need to be accumulated + * in a single thread block to avoid global atomic additions (as in ob_update_ML_wavefield.cu). + * This requires a local array of NUM_MODES size to store the local updates. + * GPU registers per thread are limited (255 32bit registers on V100), + * and at some point the registers will spill into shared or global memory + * and the kernel will get considerably slower. + */ + +#include "common.cuh" + +#define pr_dlayer(k) addr[(k)] +#define ex_dlayer(k) addr[6 * num_pods + (k)] +#define obj_dlayer(k) addr[3 * num_pods + (k)] +#define obj_roi_row(k) addr[4 * num_pods + (k)] +#define obj_roi_column(k) addr[5 * num_pods + (k)] + +extern "C" __global__ void ob_update2_ML_wavefield(int pr_sh, + int ob_modes, + int num_pods, + int ob_sh_rows, + int ob_sh_cols, + int pr_modes, + complex* ob_g, + const complex* __restrict__ pr_g, + const complex* __restrict__ ex_g, + OUT_TYPE* ob_f, + const int* addr, + IN_TYPE fac_) +{ + int y = blockIdx.y * BDIM_Y + threadIdx.y; + int dy = ob_sh_rows; + int z = blockIdx.x * BDIM_X + threadIdx.x; + int dz = ob_sh_cols; + MATH_TYPE fac = fac_; + complex ob[NUM_MODES]; + ACC_TYPE of[NUM_MODES]; + + int txy = threadIdx.y * BDIM_X + threadIdx.x; + assert(ob_modes <= NUM_MODES); + + if (y < dy && z < dz) + { +#pragma unroll + for (int i = 0; i < NUM_MODES; ++i) + { + auto idx = i * dy * dz + y * dz + z; + assert(idx < ob_modes * ob_sh_rows * ob_sh_cols); + ob[i] = ob_g[idx]; + of[i] = ob_f[idx]; + } + } + + __shared__ int addresses[BDIM_X * BDIM_Y * 5]; + + for (int p = 0; p < num_pods; p += BDIM_X * BDIM_Y) + { + int mi = BDIM_X * BDIM_Y; + if (mi > num_pods - p) + mi = num_pods - p; + + if (p > 0) + __syncthreads(); + + if (txy < mi) + { + assert(p + txy < num_pods); + assert(txy < BDIM_X * BDIM_Y); + addresses[txy * 5 + 0] = pr_dlayer(p + txy); + addresses[txy * 5 + 1] = ex_dlayer(p + txy); + addresses[txy * 5 + 2] = obj_dlayer(p + txy); + assert(obj_dlayer(p + txy) < NUM_MODES); + assert(addresses[txy * 5 + 2] < NUM_MODES); + addresses[txy * 5 + 3] = obj_roi_row(p + txy); + addresses[txy * 5 + 4] = obj_roi_column(p + txy); + } + + __syncthreads(); + + if (y >= dy || z >= dz) + continue; + +#pragma unroll 4 + for (int i = 0; i < mi; ++i) + { + int* ad = addresses + i * 5; + int v1 = y - ad[3]; + int v2 = z - ad[4]; + if (v1 >= 0 && v1 < pr_sh && v2 >= 0 && v2 < pr_sh) + { + auto pridx = ad[0] * pr_sh * pr_sh + v1 * pr_sh + v2; + assert(pridx < pr_modes * pr_sh * pr_sh); + complex pr = pr_g[pridx]; + int idx = ad[2]; + assert(idx < NUM_MODES); + auto cpr = conj(pr); + auto exidx = ad[1] * pr_sh * pr_sh + v1 * pr_sh + v2; + complex ex_val = ex_g[exidx]; + complex add_val = cpr * ex_val * fac; + ob[idx] += add_val; + complex abs2_val = cpr * pr; + ACC_TYPE add_val2 = abs2_val.real(); + of[idx] += add_val2; + } + } + } + + if (y < dy && z < dz) + { + for (int i = 0; i < NUM_MODES; ++i) + { + ob_g[i * dy * dz + y * dz + z] = ob[i]; + ob_f[i * dy * dz + y * dz + z] = of[i]; + } + } +} diff --git a/ptypy/accelerate/cuda_common/ob_update_ML_wavefield.cu b/ptypy/accelerate/cuda_common/ob_update_ML_wavefield.cu new file mode 100644 index 000000000..bab2fd620 --- /dev/null +++ b/ptypy/accelerate/cuda_common/ob_update_ML_wavefield.cu @@ -0,0 +1,73 @@ +/** ob_update_ML_wavefield. + * + * Data types: + * - IN_TYPE: the data type for the inputs (float or double) + * - OUT_TYPE: the data type for the outputs (float or double) + * - MATH_TYPE: the data type used for computation + */ + +#include "common.cuh" + +template +__device__ inline void atomicAdd(complex* x, const complex& y) +{ + auto xf = reinterpret_cast(x); + atomicAdd(xf, y.real()); + atomicAdd(xf + 1, y.imag()); +} + +extern "C" +{ + __global__ void ob_update_ML_wavefield(const complex* __restrict__ exit_wave, + int A, + int B, + int C, + const complex* __restrict__ probe, + int D, + int E, + int F, + complex* obj, + int G, + int H, + int I, + OUT_TYPE* obj_fln, + const int* __restrict__ addr, + IN_TYPE fac_) + { + const int bid = blockIdx.x; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int addr_stride = 15; + MATH_TYPE fac = fac_; + + const int* oa = addr + 3 + bid * addr_stride; + const int* pa = addr + bid * addr_stride; + const int* ea = addr + 6 + bid * addr_stride; + + probe += pa[0] * E * F + pa[1] * F + pa[2]; + obj += oa[0] * H * I + oa[1] * I + oa[2]; + obj_fln += oa[0] * H * I + oa[1] * I + oa[2]; + + assert(oa[0] * H * I + oa[1] * I + oa[2] + (B - 1) * I + C - 1 < G * H * I); + + exit_wave += ea[0] * B * C; + + for (int b = ty; b < B; b += blockDim.y) + { + for (int c = tx; c < C; c += blockDim.x) + { + complex probe_val = probe[b * F + c]; + complex exit_val = exit_wave[b * C + c]; + complex add_val_m = conj(probe_val) * exit_val * fac; + complex add_val = add_val_m; + + atomicAdd(&obj[b * I + c], add_val); + + complex abs2_val = conj(probe_val) * probe_val; + OUT_TYPE add_val2 = abs2_val.real(); + + atomicAdd(&obj_fln[b * I + c], add_val2); + } + } + } +} diff --git a/ptypy/accelerate/cuda_common/pr_update2_ML_wavefield.cu b/ptypy/accelerate/cuda_common/pr_update2_ML_wavefield.cu new file mode 100644 index 000000000..567d928be --- /dev/null +++ b/ptypy/accelerate/cuda_common/pr_update2_ML_wavefield.cu @@ -0,0 +1,123 @@ +/** pr_update2_ML_wavefield. + * + * Data types: + * - IN_TYPE: the data type for the inputs (float or double) + * - OUT_TYPE: the data type for the outputs (float or double) + * - MATH_TYPE: the data type used for computation + * - ACC_TYPE: accumulator type for local pr array + * + * NOTE: This version of pr_update goes over all tiles that need to be accumulated + * in a single thread block to avoid global atomic additions (as in pr_update_ML_wavefield.cu). + * This requires a local array of NUM_MODES size to store the local updates. + * GPU registers per thread are limited (255 32bit registers on V100), + * and at some point the registers will spill into shared or global memory + * and the kernel will get considerably slower. + */ + +#include "common.cuh" + +#define pr_dlayer(k) addr[(k)] +#define ex_dlayer(k) addr[6 * num_pods + (k)] +#define obj_dlayer(k) addr[3 * num_pods + (k)] +#define obj_roi_row(k) addr[4 * num_pods + (k)] +#define obj_roi_column(k) addr[5 * num_pods + (k)] + +extern "C" __global__ void pr_update2_ML_wavefield(int pr_sh, + int ob_sh_row, + int ob_sh_col, + int pr_modes, + int ob_modes, + int num_pods, + complex* pr_g, + const complex* __restrict__ ob_g, + const complex* __restrict__ ex_g, + OUT_TYPE* pr_f, + const int* addr, + IN_TYPE fac_) +{ + int y = blockIdx.y * BDIM_Y + threadIdx.y; + int dy = pr_sh; + int z = blockIdx.x * BDIM_X + threadIdx.x; + int dz = pr_sh; + MATH_TYPE fac = fac_; + complex pr[NUM_MODES]; + ACC_TYPE pf[NUM_MODES]; + + int txy = threadIdx.y * BDIM_X + threadIdx.x; + assert(pr_modes <= NUM_MODES); + + if (y < pr_sh && z < pr_sh) + { +#pragma unroll + for (int i = 0; i < NUM_MODES; ++i) + { + auto idx = i * dy * dz + y * dz + z; + assert(idx < pr_modes * pr_sh * pr_sh); + pr[i] = pr_g[idx]; + pf[i] = pr_f[idx]; + } + } + + __shared__ int addresses[BDIM_X * BDIM_Y * 5]; + + for (int p = 0; p < num_pods; p += BDIM_X * BDIM_Y) + { + int mi = BDIM_X * BDIM_Y; + if (mi > num_pods - p) + mi = num_pods - p; + + if (p > 0) + __syncthreads(); + + if (txy < mi) + { + assert(p + txy < num_pods); + assert(txy < BDIM_X * BDIM_Y); + addresses[txy * 5 + 0] = pr_dlayer(p + txy); + addresses[txy * 5 + 1] = ex_dlayer(p + txy); + addresses[txy * 5 + 2] = obj_dlayer(p + txy); + assert(obj_dlayer(p + txy) < NUM_MODES); + assert(addresses[txy * 5 + 2] < NUM_MODES); + addresses[txy * 5 + 3] = obj_roi_row(p + txy); + addresses[txy * 5 + 4] = obj_roi_column(p + txy); + } + + __syncthreads(); + + if (y >= pr_sh || z >= pr_sh) + continue; + +#pragma unroll 4 + for (int i = 0; i < mi; ++i) + { + int* ad = addresses + i * 5; + int v1 = y + ad[3]; + int v2 = z + ad[4]; + if (v1 >= 0 && v1 < ob_sh_row && v2 >= 0 && v2 < ob_sh_col) + { + auto obidx = ad[2] * ob_sh_row * ob_sh_col + v1 * ob_sh_col + v2; + assert(obidx < ob_modes * ob_sh_row * ob_sh_col); + complex ob = ob_g[obidx]; + int idx = ad[0]; + assert(idx < NUM_MODES); + auto cob = conj(ob); + auto exidx = ad[1] * pr_sh * pr_sh + y * pr_sh + z; + complex ex_val = ex_g[exidx]; + complex add_val = cob * ex_val * fac; + pr[idx] += add_val; + complex abs2_val = cob * ob; + ACC_TYPE add_val2 = abs2_val.real(); + pf[idx] += add_val2; + } + } + } + + if (y < pr_sh && z < pr_sh) + { + for (int i = 0; i < NUM_MODES; ++i) + { + pr_g[i * dy * dz + y * dz + z] = pr[i]; + pr_f[i * dy * dz + y * dz + z] = pf[i]; + } + } +} diff --git a/ptypy/accelerate/cuda_common/pr_update_ML_wavefield.cu b/ptypy/accelerate/cuda_common/pr_update_ML_wavefield.cu new file mode 100644 index 000000000..de236fc7c --- /dev/null +++ b/ptypy/accelerate/cuda_common/pr_update_ML_wavefield.cu @@ -0,0 +1,73 @@ +#/** pr_update_ML_wavefield. + * + * Data types: + * - IN_TYPE: the data type for the inputs (float or double) + * - OUT_TYPE: the data type for the outputs (float or double) + * - MATH_TYPE: the data type used for computation + */ + +#include "common.cuh" + +template +__device__ inline void atomicAdd(complex* x, const complex& y) +{ + auto xf = reinterpret_cast(x); + atomicAdd(xf, y.real()); + atomicAdd(xf + 1, y.imag()); +} + +extern "C" +{ + __global__ void pr_update_ML_wavefield(const complex* __restrict__ exit_wave, + int A, + int B, + int C, + complex* probe, + int D, + int E, + int F, + const complex* __restrict__ obj, + int G, + int H, + int I, + OUT_TYPE* probe_fln, + const int* __restrict__ addr, + IN_TYPE fac_) + { + const int bid = blockIdx.x; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int addr_stride = 15; + MATH_TYPE fac = fac_; + + const int* oa = addr + 3 + bid * addr_stride; + const int* pa = addr + bid * addr_stride; + const int* ea = addr + 6 + bid * addr_stride; + + probe += pa[0] * E * F + pa[1] * F + pa[2]; + probe_fln += pa[0] * E * F + pa[1] * F + pa[2]; + obj += oa[0] * H * I + oa[1] * I + oa[2]; + + assert(oa[0] * H * I + oa[1] * I + oa[2] + (B - 1) * I + C - 1 < G * H * I); + + exit_wave += ea[0] * B * C; + + for (int b = ty; b < B; b += blockDim.y) + { + for (int c = tx; c < C; c += blockDim.x) + { + complex obj_val = obj[b * I + c]; + complex exit_val = exit_wave[b * C + c]; + complex add_val_m = conj(obj_val) * exit_val * fac; + complex add_val = add_val_m; + + atomicAdd(&probe[b * F + c], add_val); + + complex abs2_val = conj(obj_val) * obj_val; + OUT_TYPE add_val2 = abs2_val.real(); + + atomicAdd(&probe_fln[b * F + c], add_val2); + } + } + } +} diff --git a/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py b/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py index caa0a192d..5f35fbd67 100644 --- a/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py +++ b/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py @@ -69,6 +69,7 @@ def __init__(self, ptycho_parent, pars=None): Maximum likelihood reconstruction engine. """ super().__init__(ptycho_parent, pars) + self.sqrt = cp.sqrt def engine_initialize(self): """ @@ -280,6 +281,13 @@ def _get_smooth_gradient(self, data, sigma): def _replace_ob_grad(self): new_ob_grad = self.ob_grad_new + + # Wavefield preconditioner for the object + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + s.gpu /= cp.sqrt(self.ob_fln.storages[name].gpu + + self.p.wavefield_delta_object) + # Smoothing preconditioner if self.smooth_gradient: self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) @@ -291,7 +299,8 @@ def _replace_ob_grad(self): def _replace_pr_grad(self): new_pr_grad = self.pr_grad_new - # probe support + + # Probe support if self.p.probe_update_start <= self.curiter: # Apply probe support if needed for name, s in new_pr_grad.storages.items(): @@ -299,6 +308,12 @@ def _replace_pr_grad(self): else: new_pr_grad.fill(0.) + # Wavefield preconditioner for the probe + if self.p.wavefield_precond: + for name, s in new_pr_grad.storages.items(): + s.gpu /= cp.sqrt(self.pr_fln.storages[name].gpu + + self.p.wavefield_delta_probe) + return self._replace_grad(self.pr_grad, new_pr_grad) def _replace_grad(self, grad, new_grad): @@ -318,7 +333,7 @@ def engine_iterate(self, num=1): return err def position_update(self): - """ + """ Position refinement """ if not self.do_position_refinement or (not self.curiter): @@ -503,6 +518,12 @@ def new_grad(self): qu_htod = self.engine.qu_htod queue = self.engine.queue + if self.engine.p.wavefield_precond: + ob_fln = self.engine.ob_fln + pr_fln = self.engine.pr_fln + ob_fln << 0. + pr_fln << 0. + self.engine._set_pr_ob_ref_for_data('gpu') ob_grad << 0. pr_grad << 0. @@ -536,6 +557,9 @@ def new_grad(self): obg = ob_grad.S[oID].data pr = self.engine.pr.S[pID].data prg = pr_grad.S[pID].data + if self.engine.p.wavefield_precond: + obf = ob_fln.S[oID].data + prf = pr_fln.S[pID].data # Schedule w & I to device ev_w, w, data_w = self.engine.w_data.to_gpu( @@ -564,11 +588,17 @@ def new_grad(self): use_atomics = self.p.object_update_cuda_atomics addr = prep.addr_gpu if use_atomics else prep.addr2_gpu - POK.ob_update_ML(addr, obg, pr, aux, atomics=use_atomics) + if self.engine.p.wavefield_precond: + POK.ob_update_ML_wavefield(addr, obg, obf, pr, aux, atomics=use_atomics) + else: + POK.ob_update_ML(addr, obg, pr, aux, atomics=use_atomics) use_atomics = self.p.probe_update_cuda_atomics addr = prep.addr_gpu if use_atomics else prep.addr2_gpu - POK.pr_update_ML(addr, prg, ob, aux, atomics=use_atomics) + if self.engine.p.wavefield_precond: + POK.pr_update_ML_wavefield(addr, prg, prf, ob, aux, atomics=use_atomics) + else: + POK.pr_update_ML(addr, prg, ob, aux, atomics=use_atomics) queue.synchronize() self.engine.dID_list.reverse() @@ -591,10 +621,18 @@ def new_grad(self): s.gpu.get(out=s.cpu) for s in pr_grad.S.values(): s.gpu.get(out=s.cpu) + if self.engine.p.wavefield_precond: + for s in ob_fln.S.values(): + s.gpu.get(out=s.cpu) + for s in pr_fln.S.values(): + s.gpu.get(out=s.cpu) self.engine._set_pr_ob_ref_for_data('cpu') ob_grad.allreduce() pr_grad.allreduce() + if self.engine.p.wavefield_precond: + ob_fln.allreduce() + pr_fln.allreduce() parallel.allreduce(LL) # HtoD cause we continue on gpu @@ -602,6 +640,11 @@ def new_grad(self): s.gpu.set(s.cpu) for s in pr_grad.S.values(): s.gpu.set(s.cpu) + if self.engine.p.wavefield_precond: + for s in ob_fln.S.values(): + s.gpu.set(s.cpu) + for s in pr_fln.S.values(): + s.gpu.set(s.cpu) self.engine._set_pr_ob_ref_for_data('gpu') # Object regularizer @@ -746,7 +789,7 @@ def delxf(x, axis=-1): self.dot = lambda x, y: self.AUK.dot(x, y).get().item() self._grad_reg_kernel = cp.ElementwiseKernel( - "float32 fac, complex64 py, complex64 px, complex64 my, complex64 mx", + "float32 fac, complex64 py, complex64 px, complex64 my, complex64 mx", "complex64 out", "out = (px+py-my-mx) * fac", "grad_reg", diff --git a/ptypy/accelerate/cuda_cupy/kernels.py b/ptypy/accelerate/cuda_cupy/kernels.py index b743e9b08..07cfe8a42 100644 --- a/ptypy/accelerate/cuda_cupy/kernels.py +++ b/ptypy/accelerate/cuda_cupy/kernels.py @@ -909,6 +909,18 @@ def __init__(self, queue_thread=None, 'MATH_TYPE': self.math_type }) self.pr_update2_ML_cuda = None + self.ob_update_ML_wavefield_cuda = load_kernel("ob_update_ML_wavefield", { + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type + }) + self.ob_update2_ML_wavefield_cuda = None + self.pr_update_ML_wavefield_cuda = load_kernel("pr_update_ML_wavefield", { + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type + }) + self.pr_update2_ML_wavefield_cuda = None self.ob_update_local_cuda = load_kernel("ob_update_local", { 'IN_TYPE': 'float', 'OUT_TYPE': 'float', @@ -1096,6 +1108,54 @@ def ob_update_ML(self, addr, ob, pr, ex, fac=2.0, atomics=True): np.float32( fac) if ex.dtype == np.complex64 else np.float64(fac))) + def ob_update_ML_wavefield(self, addr, ob, of, pr, ex, fac=2.0, atomics=True): + obsh = [np.int32(ax) for ax in ob.shape] + prsh = [np.int32(ax) for ax in pr.shape] + exsh = [np.int32(ax) for ax in ex.shape] + + if self.queue is not None: + self.queue.use() + if atomics: + if addr.shape[3] != 3 or addr.shape[2] != 5: + raise ValueError( + 'Address not in required shape for tiled ob_update') + + num_pods = np.int32(addr.shape[0] * addr.shape[1]) + self.ob_update_ML_wavefield_cuda(grid=(int(num_pods), 1, 1), + block=(32, 32, 1), + args=(ex, num_pods, exsh[1], exsh[2], + pr, prsh[0], prsh[1], prsh[2], + ob, obsh[0], obsh[1], obsh[2], + of, + addr, + np.float32( + fac) if ex.dtype == np.complex64 else np.float64(fac))) + else: + if addr.shape[0] != 5 or addr.shape[1] != 3: + raise ValueError( + 'Address not in required shape for tiled ob_update') + + num_pods = np.int32(addr.shape[2] * addr.shape[3]) + if not self.ob_update2_ML_wavefield_cuda: + self.ob_update2_ML_wavefield_cuda = load_kernel("ob_update2_ML_wavefield", { + "NUM_MODES": obsh[0], + "BDIM_X": 16, + "BDIM_Y": 16, + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type, + 'ACC_TYPE': self.accumulator_type + }) + grid = [int((x+15)//16) for x in ob.shape[-2:]] + grid = (grid[1], grid[0], int(1)) + self.ob_update2_ML_wavefield_cuda(grid=grid, + block=(16, 16, 1), + args=(prsh[-1], obsh[0], num_pods, + obsh[-2], obsh[-1], prsh[0], + ob, pr, ex, of, addr, + np.float32( + fac) if ex.dtype == np.complex64 else np.float64(fac))) + def pr_update_ML(self, addr, pr, ob, ex, fac=2.0, atomics=False): obsh = [np.int32(ax) for ax in ob.shape] prsh = [np.int32(ax) for ax in pr.shape] @@ -1140,6 +1200,51 @@ def pr_update_ML(self, addr, pr, ob, ex, fac=2.0, atomics=False): np.float32( fac) if ex.dtype == np.complex64 else np.float64(fac))) + def pr_update_ML_wavefield(self, addr, pr, pf, ob, ex, fac=2.0, atomics=False): + obsh = [np.int32(ax) for ax in ob.shape] + prsh = [np.int32(ax) for ax in pr.shape] + if self.queue is not None: + self.queue.use() + if atomics: + if addr.shape[3] != 3 or addr.shape[2] != 5: + raise ValueError( + 'Address not in required shape for tiled pr_update') + num_pods = np.int32(addr.shape[0] * addr.shape[1]) + self.pr_update_ML_wavefield_cuda(grid=(int(num_pods), 1, 1), + block=(32, 32, 1), + args=(ex, num_pods, prsh[1], prsh[2], + pr, prsh[0], prsh[1], prsh[2], + ob, obsh[0], obsh[1], obsh[2], + pf, + addr, + np.float32( + fac) if ex.dtype == np.complex64 else np.float64(fac))) + else: + if addr.shape[0] != 5 or addr.shape[1] != 3: + raise ValueError( + 'Address not in required shape for tiled pr_update') + num_pods = np.int32(addr.shape[2] * addr.shape[3]) + if not self.pr_update2_ML_wavefield_cuda: + self.pr_update2_ML_wavefield_cuda = load_kernel("pr_update2_ML_wavefield", { + "NUM_MODES": prsh[0], + "BDIM_X": 16, + "BDIM_Y": 16, + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type, + 'ACC_TYPE': self.accumulator_type + }) + + grid = [int((x+15)//16) for x in pr.shape[-2:]] + grid = (grid[0], grid[1], int(1)) + self.pr_update2_ML_wavefield_cuda(grid=grid, + block=(16, 16, 1), + args=(prsh[-1], obsh[-2], obsh[-1], + prsh[0], obsh[0], num_pods, + pr, ob, ex, pf, addr, + np.float32( + fac) if ex.dtype == np.complex64 else np.float64(fac))) + def ob_update_local(self, addr, ob, pr, ex, aux, prn, a=0., b=1.): if self.queue is not None: self.queue.use() diff --git a/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py b/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py index d527d9f15..a05839b47 100644 --- a/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py +++ b/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py @@ -14,7 +14,7 @@ import numpy as np from pycuda import gpuarray import pycuda.driver as cuda -import pycuda.cumath +import pycuda.cumath as cm from ptypy.engines import register from ptypy.accelerate.base.engines.ML_serial import ML_serial, BaseModelSerial @@ -73,6 +73,7 @@ def __init__(self, ptycho_parent, pars=None): Maximum likelihood reconstruction engine. """ super().__init__(ptycho_parent, pars) + self.sqrt = cm.sqrt def engine_initialize(self): """ @@ -275,6 +276,13 @@ def _get_smooth_gradient(self, data, sigma): def _replace_ob_grad(self): new_ob_grad = self.ob_grad_new + + # Wavefield preconditioner for the object + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + s.gpu /= cm.sqrt(self.ob_fln.storages[name].gpu + + self.p.wavefield_delta_object) + # Smoothing preconditioner if self.smooth_gradient: self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) @@ -285,7 +293,8 @@ def _replace_ob_grad(self): def _replace_pr_grad(self): new_pr_grad = self.pr_grad_new - # probe support + + # Probe support if self.p.probe_update_start <= self.curiter: # Apply probe support if needed for name, s in new_pr_grad.storages.items(): @@ -293,6 +302,12 @@ def _replace_pr_grad(self): else: new_pr_grad.fill(0.) + # Wavefield preconditioner for the probe + if self.p.wavefield_precond: + for name, s in new_pr_grad.storages.items(): + s.gpu /= cm.sqrt(self.pr_fln.storages[name].gpu + + self.p.wavefield_delta_probe) + return self._replace_grad(self.pr_grad , new_pr_grad) def _replace_grad(self, grad, new_grad): @@ -485,6 +500,12 @@ def new_grad(self): qu_htod = self.engine.qu_htod queue = self.engine.queue + if self.engine.p.wavefield_precond: + ob_fln = self.engine.ob_fln + pr_fln = self.engine.pr_fln + ob_fln << 0. + pr_fln << 0. + self.engine._set_pr_ob_ref_for_data('gpu') ob_grad << 0. pr_grad << 0. @@ -518,6 +539,9 @@ def new_grad(self): obg = ob_grad.S[oID].data pr = self.engine.pr.S[pID].data prg = pr_grad.S[pID].data + if self.engine.p.wavefield_precond: + obf = ob_fln.S[oID].data + prf = pr_fln.S[pID].data # Schedule w & I to device ev_w, w, data_w = self.engine.w_data.to_gpu(prep.weights, dID, qu_htod) @@ -545,11 +569,17 @@ def new_grad(self): use_atomics = self.p.object_update_cuda_atomics addr = prep.addr_gpu if use_atomics else prep.addr2_gpu - POK.ob_update_ML(addr, obg, pr, aux, atomics=use_atomics) + if self.engine.p.wavefield_precond: + POK.ob_update_ML_wavefield(addr, obg, obf, pr, aux, atomics=use_atomics) + else: + POK.ob_update_ML(addr, obg, pr, aux, atomics=use_atomics) use_atomics = self.p.probe_update_cuda_atomics addr = prep.addr_gpu if use_atomics else prep.addr2_gpu - POK.pr_update_ML(addr, prg, ob, aux, atomics=use_atomics) + if self.engine.p.wavefield_precond: + POK.pr_update_ML_wavefield(addr, prg, prf, ob, aux, atomics=use_atomics) + else: + POK.pr_update_ML(addr, prg, ob, aux, atomics=use_atomics) queue.synchronize() self.engine.dID_list.reverse() @@ -571,10 +601,18 @@ def new_grad(self): s.gpu.get(s.cpu) for s in pr_grad.S.values(): s.gpu.get(s.cpu) + if self.engine.p.wavefield_precond: + for s in ob_fln.S.values(): + s.gpu.get(s.cpu) + for s in pr_fln.S.values(): + s.gpu.get(s.cpu) self.engine._set_pr_ob_ref_for_data('cpu') ob_grad.allreduce() pr_grad.allreduce() + if self.engine.p.wavefield_precond: + ob_fln.allreduce() + pr_fln.allreduce() parallel.allreduce(LL) # HtoD cause we continue on gpu @@ -582,6 +620,11 @@ def new_grad(self): s.gpu.set(s.cpu) for s in pr_grad.S.values(): s.gpu.set(s.cpu) + if self.engine.p.wavefield_precond: + for s in ob_fln.S.values(): + s.gpu.set(s.cpu) + for s in pr_fln.S.values(): + s.gpu.set(s.cpu) self.engine._set_pr_ob_ref_for_data('gpu') # Object regularizer diff --git a/ptypy/accelerate/cuda_pycuda/kernels.py b/ptypy/accelerate/cuda_pycuda/kernels.py index 8fcebb6ef..bab47a8a3 100644 --- a/ptypy/accelerate/cuda_pycuda/kernels.py +++ b/ptypy/accelerate/cuda_pycuda/kernels.py @@ -880,6 +880,18 @@ def __init__(self, queue_thread=None, 'MATH_TYPE': self.math_type }) self.pr_update2_ML_cuda = None + self.ob_update_ML_wavefield_cuda = load_kernel("ob_update_ML_wavefield", { + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type + }) + self.ob_update2_ML_wavefield_cuda = None + self.pr_update_ML_wavefield_cuda = load_kernel("pr_update_ML_wavefield", { + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type + }) + self.pr_update2_ML_wavefield_cuda = None self.ob_update_local_cuda = load_kernel("ob_update_local", { 'IN_TYPE': 'float', 'OUT_TYPE': 'float', @@ -1045,6 +1057,46 @@ def ob_update_ML(self, addr, ob, pr, ex, fac=2.0, atomics=True): np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), block=(16, 16, 1), grid=grid, stream=self.queue) + def ob_update_ML_wavefield(self, addr, ob, of, pr, ex, fac=2.0, atomics=True): + obsh = [np.int32(ax) for ax in ob.shape] + prsh = [np.int32(ax) for ax in pr.shape] + exsh = [np.int32(ax) for ax in ex.shape] + + if atomics: + if addr.shape[3] != 3 or addr.shape[2] != 5: + raise ValueError('Address not in required shape for tiled ob_update') + + num_pods = np.int32(addr.shape[0] * addr.shape[1]) + self.ob_update_ML_wavefield_cuda(ex, num_pods, exsh[1], exsh[2], + pr, prsh[0], prsh[1], prsh[2], + ob, obsh[0], obsh[1], obsh[2], + of, + addr, + np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), + block=(32, 32, 1), grid=(int(num_pods), 1, 1), stream=self.queue) + else: + if addr.shape[0] != 5 or addr.shape[1] != 3: + raise ValueError('Address not in required shape for tiled ob_update') + + num_pods = np.int32(addr.shape[2] * addr.shape[3]) + if not self.ob_update2_ML_wavefield_cuda: + self.ob_update2_ML_wavefield_cuda = load_kernel("ob_update2_ML_wavefield", { + "NUM_MODES": obsh[0], + "BDIM_X": 16, + "BDIM_Y": 16, + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type, + 'ACC_TYPE': self.accumulator_type + }) + grid = [int((x+15)//16) for x in ob.shape[-2:]] + grid = (grid[1], grid[0], int(1)) + self.ob_update2_ML_wavefield_cuda(prsh[-1], obsh[0], num_pods, + obsh[-2], obsh[-1], prsh[0], + ob, pr, ex, of, addr, + np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), + block=(16, 16, 1), grid=grid, stream=self.queue) + def pr_update_ML(self, addr, pr, ob, ex, fac=2.0, atomics=False): obsh = [np.int32(ax) for ax in ob.shape] prsh = [np.int32(ax) for ax in pr.shape] @@ -1081,6 +1133,42 @@ def pr_update_ML(self, addr, pr, ob, ex, fac=2.0, atomics=False): np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), block=(16, 16, 1), grid=grid, stream=self.queue) + def pr_update_ML_wavefield(self, addr, pr, pf, ob, ex, fac=2.0, atomics=False): + obsh = [np.int32(ax) for ax in ob.shape] + prsh = [np.int32(ax) for ax in pr.shape] + if atomics: + if addr.shape[3] != 3 or addr.shape[2] != 5: + raise ValueError('Address not in required shape for tiled pr_update') + num_pods = np.int32(addr.shape[0] * addr.shape[1]) + self.pr_update_ML_wavefield_cuda(ex, num_pods, prsh[1], prsh[2], + pr, prsh[0], prsh[1], prsh[2], + ob, obsh[0], obsh[1], obsh[2], + pf, + addr, + np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), + block=(32, 32, 1), grid=(int(num_pods), 1, 1), stream=self.queue) + else: + if addr.shape[0] != 5 or addr.shape[1] != 3: + raise ValueError('Address not in required shape for tiled pr_update') + num_pods = np.int32(addr.shape[2] * addr.shape[3]) + if not self.pr_update2_ML_wavefield_cuda: + self.pr_update2_ML_wavefield_cuda = load_kernel("pr_update2_ML_wavefield", { + "NUM_MODES": prsh[0], + "BDIM_X": 16, + "BDIM_Y": 16, + 'IN_TYPE': 'float', + 'OUT_TYPE': 'float', + 'MATH_TYPE': self.math_type, + 'ACC_TYPE': self.accumulator_type + }) + + grid = [int((x+15)//16) for x in pr.shape[-2:]] + grid = (grid[0], grid[1], int(1)) + self.pr_update2_ML_wavefield_cuda(prsh[-1], obsh[-2], obsh[-1], + prsh[0], obsh[0], num_pods, + pr, ob, ex, pf, addr, + np.float32(fac) if ex.dtype == np.complex64 else np.float64(fac), + block=(16, 16, 1), grid=grid, stream=self.queue) def ob_update_local(self, addr, ob, pr, ex, aux, prn, a=0., b=1.): prn_max = gpuarray.max(prn, stream=self.queue) diff --git a/ptypy/engines/ML.py b/ptypy/engines/ML.py index b5c93dfbe..8cafd78c8 100644 --- a/ptypy/engines/ML.py +++ b/ptypy/engines/ML.py @@ -106,6 +106,22 @@ class ML(PositionCorrectionEngine): help = How many coefficients to be used in the the linesearch doc = choose between the 'quadratic' approximation (default) or 'all' + [wavefield_precond] + default = False + type = bool + help = Whether to use the wavefield preconditioner + doc = This parameter can give faster convergence. + + [wavefield_delta_object] + default = 0.1 + type = float + help = Wavefield preconditioner damping constant for the object. + + [wavefield_delta_probe] + default = 0.1 + type = float + help = Wavefield preconditioner damping constant for the probe. + """ SUPPORTED_MODELS = [Full, Vanilla, Bragg3dModel, BlockVanilla, BlockFull, GradFull, BlockGradFull] @@ -137,6 +153,9 @@ def __init__(self, ptycho_parent, pars=None): # Probe gradient self.pr_grad_new = None + # Object and probe fluence maps + self.ob_fln = None + self.pr_fln = None # Other self.tmin = None @@ -172,6 +191,11 @@ def engine_initialize(self): self.pr_grad_new = self.pr.copy(self.pr.ID + '_grad_new', fill=0.) self.pr_h = self.pr.copy(self.pr.ID + '_h', fill=0.) + # Object and probe fluence maps + if self.p.wavefield_precond: + self.ob_fln = self.ob.copy(self.ob.ID + '_fln', fill=0., dtype='real') + self.pr_fln = self.pr.copy(self.pr.ID + '_fln', fill=0., dtype='real') + self.tmin = 1. # Other options @@ -230,6 +254,12 @@ def engine_iterate(self, num=1): else: new_pr_grad.fill(0.) + # Wavefield preconditioner + if self.p.wavefield_precond: + for name, s in new_ob_grad.storages.items(): + new_ob_grad.storages[name].data /= np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object) + new_pr_grad.storages[name].data /= np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe) + # Smoothing preconditioner if self.smooth_gradient: self.smooth_gradient.sigma *= (1. - self.p.smooth_gradient_decay) @@ -276,19 +306,35 @@ def engine_iterate(self, num=1): self.pr_grad << new_pr_grad dt = self.ptycho.FType + # 3. Next conjugate self.ob_h *= bt / self.tmin - - # Smoothing preconditioner - if self.smooth_gradient: + # Wavefield preconditioner for the object (with and without smoothing preconditioner) + if self.p.wavefield_precond: for name, s in self.ob_h.storages.items(): - s.data[:] -= self.smooth_gradient(self.ob_grad.storages[name].data) + if self.smooth_gradient: + s.data[:] -= self.smooth_gradient(self.ob_grad.storages[name].data + / np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object)) + else: + s.data[:] -= (self.ob_grad.storages[name].data + / np.sqrt(self.ob_fln.storages[name].data + self.p.wavefield_delta_object)) else: - self.ob_h -= self.ob_grad + # Smoothing preconditioner for the object + if self.smooth_gradient: + for name, s in self.ob_h.storages.items(): + s.data[:] -= self.smooth_gradient(self.ob_grad.storages[name].data) + else: + self.ob_h -= self.ob_grad self.pr_h *= bt / self.tmin self.pr_grad *= self.scale_p_o - self.pr_h -= self.pr_grad + # Wavefield preconditioner for the probe + if self.p.wavefield_precond: + for name, s in self.pr_h.storages.items(): + s.data[:] -= (self.pr_grad.storages[name].data + / np.sqrt(self.pr_fln.storages[name].data + self.p.wavefield_delta_probe)) + else: + self.pr_h -= self.pr_grad # In principle, the way things are now programmed this part # could be iterated over in a real Newton-Raphson style. @@ -309,9 +355,9 @@ def engine_iterate(self, num=1): self.tmin = dt(-0.5 * B[1] / B[2]) else: raise NotImplementedError("poly_line_coeffs should be 'quadratic' or 'all'") - + tc += time.time() - t2 - + self.ob_h *= self.tmin self.pr_h *= self.tmin self.ob += self.ob_h @@ -353,6 +399,11 @@ def engine_finalize(self): del self.pr_grad_new del self.ptycho.containers[self.pr_h.ID] del self.pr_h + if self.p.wavefield_precond: + del self.ptycho.containers[self.ob_fln.ID] + del self.ob_fln + del self.ptycho.containers[self.pr_fln.ID] + del self.pr_fln # Save floating intensities into runtime self.ptycho.runtime["float_intens"] = parallel.gather_dict(self.ML_model.float_intens_coeff) @@ -377,6 +428,8 @@ def __init__(self, MLengine): self.ob = self.engine.ob self.ob_grad = self.engine.ob_grad_new self.pr_grad = self.engine.pr_grad_new + self.ob_fln = self.engine.ob_fln + self.pr_fln = self.engine.pr_fln self.pr = self.engine.pr self.float_intens_coeff = {} @@ -490,6 +543,9 @@ def new_grad(self): """ self.ob_grad.fill(0.) self.pr_grad.fill(0.) + if self.p.wavefield_precond: + self.ob_fln.fill(0.) + self.pr_fln.fill(0.) # We need an array for MPI LL = np.array([0.]) @@ -531,6 +587,11 @@ def new_grad(self): self.ob_grad[pod.ob_view] += 2. * xi * pod.probe.conj() self.pr_grad[pod.pr_view] += 2. * xi * pod.object.conj() + # Compute fluence maps for object and probe + if self.p.wavefield_precond: + self.ob_fln[pod.ob_view] += u.abs2(pod.probe) + self.pr_fln[pod.pr_view] += u.abs2(pod.object) + diff_view.error = LLL error_dct[dname] = np.array([0, LLL / np.prod(DI.shape), 0]) LL += LLL @@ -538,6 +599,9 @@ def new_grad(self): # MPI reduction of gradients self.ob_grad.allreduce() self.pr_grad.allreduce() + if self.p.wavefield_precond: + self.ob_fln.allreduce() + self.pr_fln.allreduce() parallel.allreduce(LL) # Object regularizer @@ -733,6 +797,9 @@ def new_grad(self): """ self.ob_grad.fill(0.) self.pr_grad.fill(0.) + if self.p.wavefield_precond: + self.ob_fln.fill(0.) + self.pr_fln.fill(0.) # We need an array for MPI LL = np.array([0.]) @@ -774,6 +841,11 @@ def new_grad(self): self.ob_grad[pod.ob_view] += 2 * xi * pod.probe.conj() self.pr_grad[pod.pr_view] += 2 * xi * pod.object.conj() + # Compute fluence maps for object and probe + if self.p.wavefield_precond: + self.ob_fln[pod.ob_view] += u.abs2(pod.probe) + self.pr_fln[pod.pr_view] += u.abs2(pod.object) + diff_view.error = LLL error_dct[dname] = np.array([0, LLL / np.prod(DI.shape), 0]) LL += LLL @@ -781,6 +853,9 @@ def new_grad(self): # MPI reduction of gradients self.ob_grad.allreduce() self.pr_grad.allreduce() + if self.p.wavefield_precond: + self.ob_fln.allreduce() + self.pr_fln.allreduce() parallel.allreduce(LL) # Object regularizer @@ -989,6 +1064,9 @@ def new_grad(self): """ self.ob_grad.fill(0.) self.pr_grad.fill(0.) + if self.p.wavefield_precond: + self.ob_fln.fill(0.) + self.pr_fln.fill(0.) # We need an array for MPI LL = np.array([0.]) @@ -1030,6 +1108,11 @@ def new_grad(self): self.ob_grad[pod.ob_view] += 2. * xi * pod.probe.conj() self.pr_grad[pod.pr_view] += 2. * xi * pod.object.conj() + # Compute fluence maps for object and probe + if self.p.wavefield_precond: + self.ob_fln[pod.ob_view] += u.abs2(pod.probe) + self.pr_fln[pod.pr_view] += u.abs2(pod.object) + diff_view.error = LLL error_dct[dname] = np.array([0, LLL / np.prod(DA.shape), 0]) LL += LLL @@ -1037,6 +1120,9 @@ def new_grad(self): # MPI reduction of gradients self.ob_grad.allreduce() self.pr_grad.allreduce() + if self.p.wavefield_precond: + self.ob_fln.allreduce() + self.pr_fln.allreduce() parallel.allreduce(LL) # Object regularizer diff --git a/templates/engines/cupy/moonflower_ML_cupy.py b/templates/engines/cupy/moonflower_ML_cupy.py index af0427cfb..61845bdd3 100644 --- a/templates/engines/cupy/moonflower_ML_cupy.py +++ b/templates/engines/cupy/moonflower_ML_cupy.py @@ -16,7 +16,7 @@ # for verbose output p.verbose_level = "info" -p.frames_per_block = 400 +p.frames_per_block = 100 # set home path p.io = u.Param() p.io.home = "/".join([tmpdir, "ptypy"]) @@ -33,7 +33,7 @@ p.scans.MF.data= u.Param() p.scans.MF.data.name = 'MoonFlowerScan' p.scans.MF.data.shape = 128 -p.scans.MF.data.num_frames = 100 +p.scans.MF.data.num_frames = 200 p.scans.MF.data.save = None p.scans.MF.illumination = u.Param(diversity=None) diff --git a/templates/engines/cupy/moonflower_ML_cupy_wavefield.py b/templates/engines/cupy/moonflower_ML_cupy_wavefield.py new file mode 100644 index 000000000..0e2d896b2 --- /dev/null +++ b/templates/engines/cupy/moonflower_ML_cupy_wavefield.py @@ -0,0 +1,58 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="cupy") + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 100 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'ML_cupy' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/numpy/moonflower_ML_Gaussian_wavefield.py b/templates/engines/numpy/moonflower_ML_Gaussian_wavefield.py new file mode 100644 index 000000000..b5f409f68 --- /dev/null +++ b/templates/engines/numpy/moonflower_ML_Gaussian_wavefield.py @@ -0,0 +1,56 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" +#import ptypy +from ptypy.core import Ptycho +from ptypy import utils as u + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" + +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) + +# saving intermediate results +p.io.autosave = u.Param(active=False) + +# opens plotting GUI if interaction set to active) +p.io.autoplot = u.Param(active=True) +p.io.interaction = u.Param(active=True) + +# max 100 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'ML' +p.engines.engine00.ML_type = 'Gaussian' +p.engines.engine00.numiter = 300 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/pycuda/moonflower_ML_pycuda.py b/templates/engines/pycuda/moonflower_ML_pycuda.py index 2938275f7..8afcf3517 100644 --- a/templates/engines/pycuda/moonflower_ML_pycuda.py +++ b/templates/engines/pycuda/moonflower_ML_pycuda.py @@ -16,7 +16,7 @@ # for verbose output p.verbose_level = "info" -p.frames_per_block = 400 +p.frames_per_block = 100 # set home path p.io = u.Param() p.io.home = "/".join([tmpdir, "ptypy"]) diff --git a/templates/engines/pycuda/moonflower_ML_pycuda_wavefield.py b/templates/engines/pycuda/moonflower_ML_pycuda_wavefield.py new file mode 100644 index 000000000..04c0528f3 --- /dev/null +++ b/templates/engines/pycuda/moonflower_ML_pycuda_wavefield.py @@ -0,0 +1,58 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" + +from ptypy.core import Ptycho +from ptypy import utils as u +import ptypy +ptypy.load_gpu_engines(arch="pycuda") + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 100 +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'ML_pycuda' +p.engines.engine00.numiter = 300 +p.engines.engine00.numiter_contiguous = 5 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/templates/engines/serial/moonflower_ML_serial_wavefield.py b/templates/engines/serial/moonflower_ML_serial_wavefield.py new file mode 100644 index 000000000..3d2d2dc84 --- /dev/null +++ b/templates/engines/serial/moonflower_ML_serial_wavefield.py @@ -0,0 +1,59 @@ +""" +This script is a test for ptychographic reconstruction in the absence +of actual data. It uses the test Scan class +`ptypy.core.data.MoonFlowerScan` to provide "data". +""" +from ptypy.core import Ptycho +from ptypy import utils as u + +import ptypy +ptypy.load_gpu_engines(arch="serial") + +import tempfile +tmpdir = tempfile.gettempdir() + +p = u.Param() + +# for verbose output +p.verbose_level = "info" +p.frames_per_block = 100 + +# set home path +p.io = u.Param() +p.io.home = "/".join([tmpdir, "ptypy"]) +p.io.autosave = u.Param(active=False) +p.io.autoplot = u.Param(active=False) +p.io.interaction = u.Param(active=False) + +# max 200 frames (128x128px) of diffraction data +p.scans = u.Param() +p.scans.MF = u.Param() +# now you have to specify which ScanModel to use with scans.XX.name, +# just as you have to give 'name' for engines and PtyScan subclasses. +p.scans.MF.name = 'BlockFull' +p.scans.MF.data= u.Param() +p.scans.MF.data.name = 'MoonFlowerScan' +p.scans.MF.data.shape = 128 +p.scans.MF.data.num_frames = 200 +p.scans.MF.data.save = None + +p.scans.MF.illumination = u.Param(diversity=None) +p.scans.MF.coherence = u.Param(num_probe_modes=1) +# position distance in fraction of illumination frame +p.scans.MF.data.density = 0.2 +# total number of photon in empty beam +p.scans.MF.data.photons = 1e8 +# Gaussian FWHM of possible detector blurring +p.scans.MF.data.psf = 0. + +# attach a reconstrucion engine +p.engines = u.Param() +p.engines.engine00 = u.Param() +p.engines.engine00.name = 'ML_serial' +p.engines.engine00.ML_type = 'Gaussian' +p.engines.engine00.numiter = 300 +p.engines.engine00.wavefield_precond = True + +# prepare and run +if __name__ == "__main__": + P = Ptycho(p,level=5) diff --git a/test/accelerate_tests/base_tests/engine_test.py b/test/accelerate_tests/base_tests/engine_test.py index 855536993..30cb84a55 100644 --- a/test/accelerate_tests/base_tests/engine_test.py +++ b/test/accelerate_tests/base_tests/engine_test.py @@ -75,7 +75,7 @@ def check_engine_output(self, output, plotting=False, debug=False): err_msg="The object arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") - + def test_ML_serial_base(self): out = [] @@ -152,6 +152,26 @@ def test_ML_serial_smoothing_regularizer(self): scanmodel="BlockFull", autosave=False, verbose_level="critical")) self.check_engine_output(out, plotting=False, debug=False) + @pytest.mark.skip(reason="Funny behaviour with this test, most likely due to numerical instabilities, see issue #612") + def test_ML_serial_wavefield_preconditioner(self): + out = [] + for eng in ["ML", "ML_serial"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 100 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0. + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + def test_ML_serial_all(self): out = [] for eng in ["ML", "ML_serial"]: diff --git a/test/accelerate_tests/base_tests/po_update_kernel_test.py b/test/accelerate_tests/base_tests/po_update_kernel_test.py index 953c26b54..9b3ca5c1b 100644 --- a/test/accelerate_tests/base_tests/po_update_kernel_test.py +++ b/test/accelerate_tests/base_tests/po_update_kernel_test.py @@ -188,7 +188,7 @@ def test_pr_update(self): err_msg="The probe denominatorhas not been updated as expected") def test_pr_update_ML(self): - # setup + # setup addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() # test @@ -242,6 +242,100 @@ def test_ob_update_ML(self): np.testing.assert_array_equal(object_array, expected_object_array, err_msg="The object array has not been updated as expected") + def test_pr_update_ML_wavefield(self): + # setup + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + probe_wavefield = np.zeros_like(probe, dtype=FLOAT_TYPE) + + # test + POUK = PoUpdateKernel() + POUK.allocate() # this doesn't do anything, but is the call pattern. + POUK.pr_update_ML_wavefield(addr, probe, probe_wavefield, object_array, exit_wave) + + # assert + expected_probe = np.array([[[625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j]], + + [[786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(probe, expected_probe, + err_msg="The probe has not been updated as expected") + + # assert + expected_probe_wavefield = np.array( + [[[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]], + + [[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(probe_wavefield, expected_probe_wavefield, + err_msg="The probe wavefield has not been updated as expected") + + def test_ob_update_ML_wavefield(self): + # setup + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + object_array_wavefield = np.zeros_like(object_array, dtype=FLOAT_TYPE) + + # test + POUK = PoUpdateKernel() + POUK.allocate() # this doesn't do anything, but is the call pattern. + POUK.ob_update_ML_wavefield(addr, object_array, object_array_wavefield, probe, exit_wave) + + # assert + expected_object_array = np.array( + [[[29. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 77. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [125. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 173. + 1.j, 1. + 1.j], + [1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j]], + + [[44. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 92. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [140. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 188. + 4.j, 4. + 4.j], + [4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(object_array, expected_object_array, + err_msg="The object array has not been updated as expected") + + # assert + expected_object_array_wavefield = np.array( + [[[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]], + + [[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(object_array_wavefield, expected_object_array_wavefield, + err_msg="The object array wavefield has not been updated as expected") def test_pr_update_local(self): # setup diff --git a/test/accelerate_tests/cuda_cupy_tests/engine_tests.py b/test/accelerate_tests/cuda_cupy_tests/engine_tests.py index fe70b58bc..2dc64c34d 100644 --- a/test/accelerate_tests/cuda_cupy_tests/engine_tests.py +++ b/test/accelerate_tests/cuda_cupy_tests/engine_tests.py @@ -71,13 +71,13 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): RMSE_ob = (np.mean(np.abs(OBJ_ML_cupy - OBJ_ML_serial)**2)) RMSE_pr = (np.mean(np.abs(PRB_ML_cupy - PRB_ML_serial)**2)) # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, err_msg="The object arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") - + def test_ML_cupy_base(self): out = [] for eng in ["ML_serial", "ML_cupy"]: @@ -151,6 +151,25 @@ def test_ML_cupy_smoothing_regularizer(self): scanmodel="BlockFull", autosave=False, verbose_level="critical")) self.check_engine_output(out, plotting=False, debug=False) + def test_ML_cupy_wavefield_preconditioner(self): + out = [] + for eng in ["ML_serial", "ML_cupy"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 200 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0 + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + def test_ML_cupy_all(self): out = [] for eng in ["ML_serial", "ML_cupy"]: diff --git a/test/accelerate_tests/cuda_cupy_tests/po_update_kernel_test.py b/test/accelerate_tests/cuda_cupy_tests/po_update_kernel_test.py index 5b2751877..bad8da21b 100644 --- a/test/accelerate_tests/cuda_cupy_tests/po_update_kernel_test.py +++ b/test/accelerate_tests/cuda_cupy_tests/po_update_kernel_test.py @@ -648,6 +648,127 @@ def test_ob_update_ML_atomics_REGRESSION(self): def test_ob_update_ML_tiled_REGRESSION(self): self.ob_update_ML_tester(False) + def pr_update_ML_wavefield_tester(self, atomics=False): + ''' + setup + ''' + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + probe_wavefield = cp.asarray(np.zeros_like(probe, dtype=FLOAT_TYPE)) + ''' + test + ''' + POUK = PoUpdateKernel() + + POUK.allocate() # this doesn't do anything, but is the call pattern. + + if not atomics: + addr2 = np.ascontiguousarray(np.transpose(addr.get(), (2, 3, 0, 1))) + addr = cp.asarray(addr2) + + POUK.pr_update_ML_wavefield(addr, probe, probe_wavefield, object_array, exit_wave, atomics=atomics) + + expected_probe = np.array([[[625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j]], + + [[786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(probe.get(), expected_probe, + err_msg="The probe has not been updated as expected") + + expected_probe_wavefield = np.array( + [[[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]], + + [[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(probe_wavefield.get(), expected_probe_wavefield, + err_msg="The probe wavefield has not been updated as expected") + + def test_pr_update_ML_wavefield_atomics_REGRESSION(self): + self.pr_update_ML_wavefield_tester(True) + + def test_pr_update_ML_wavefield_tiled_REGRESSION(self): + self.pr_update_ML_wavefield_tester(False) + + def ob_update_ML_wavefield_tester(self, atomics=True): + ''' + setup + ''' + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + object_array_wavefield = cp.asarray(np.zeros_like(object_array, dtype=FLOAT_TYPE)) + ''' + test + ''' + POUK = PoUpdateKernel() + + POUK.allocate() # this doesn't do anything, but is the call pattern. + + if not atomics: + addr2 = np.ascontiguousarray(np.transpose(addr.get(), (2, 3, 0, 1))) + addr = cp.asarray(addr2) + + POUK.ob_update_ML_wavefield(addr, object_array, object_array_wavefield, probe, exit_wave, atomics=atomics) + + expected_object_array = np.array( + [[[29. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 77. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [125. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 173. + 1.j, 1. + 1.j], + [1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j]], + + [[44. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 92. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [140. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 188. + 4.j, 4. + 4.j], + [4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(object_array.get(), expected_object_array, + err_msg="The object array has not been updated as expected") + + expected_object_array_wavefield = np.array( + [[[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]], + + [[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(object_array_wavefield.get(), expected_object_array_wavefield, + err_msg="The object array wavefield has not been updated as expected") + + def test_ob_update_ML_wavefield_atomics_REGRESSION(self): + self.ob_update_ML_wavefield_tester(True) + + def test_ob_update_ML_wavefield_tiled_REGRESSION(self): + self.ob_update_ML_wavefield_tester(False) + def test_ob_update_local_UNITY(self): ''' setup diff --git a/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py b/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py index 20b950c61..bf1223c2c 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py +++ b/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py @@ -71,13 +71,13 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): RMSE_ob = (np.mean(np.abs(OBJ_ML_pycuda - OBJ_ML_serial)**2)) RMSE_pr = (np.mean(np.abs(PRB_ML_pycuda - PRB_ML_serial)**2)) # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, err_msg="The object arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") - + def test_ML_pycuda_base(self): out = [] for eng in ["ML_serial", "ML_pycuda"]: @@ -151,6 +151,25 @@ def test_ML_pycuda_smoothing_regularizer(self): scanmodel="BlockFull", autosave=False, verbose_level="critical")) self.check_engine_output(out, plotting=False, debug=False) + def test_ML_pycuda_wavefield_preconditioner(self): + out = [] + for eng in ["ML_serial", "ML_pycuda"]: + engine_params = u.Param() + engine_params.name = eng + engine_params.numiter = 200 + engine_params.floating_intensities = False + engine_params.reg_del2 = False + engine_params.reg_del2_amplitude = 1. + engine_params.smooth_gradient = 0 + engine_params.smooth_gradient_decay = 0. + engine_params.scale_precond = False + engine_params.wavefield_precond = True + engine_params.wavefield_delta_object = 0.1 + engine_params.wavefield_delta_probe = 0.1 + out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, + scanmodel="BlockFull", autosave=False, verbose_level="critical")) + self.check_engine_output(out, plotting=False, debug=False) + def test_ML_pycuda_all(self): out = [] for eng in ["ML_serial", "ML_pycuda"]: diff --git a/test/accelerate_tests/cuda_pycuda_tests/po_update_kernel_test.py b/test/accelerate_tests/cuda_pycuda_tests/po_update_kernel_test.py index 5fc1112cd..5a0167d01 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/po_update_kernel_test.py +++ b/test/accelerate_tests/cuda_pycuda_tests/po_update_kernel_test.py @@ -648,6 +648,127 @@ def test_ob_update_ML_atomics_REGRESSION(self): def test_ob_update_ML_tiled_REGRESSION(self): self.ob_update_ML_tester(False) + def pr_update_ML_wavefield_tester(self, atomics=False): + ''' + setup + ''' + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + probe_wavefield = gpuarray.to_gpu(np.zeros_like(probe, dtype=FLOAT_TYPE)) + ''' + test + ''' + POUK = PoUpdateKernel() + + POUK.allocate() # this doesn't do anything, but is the call pattern. + + if not atomics: + addr2 = np.ascontiguousarray(np.transpose(addr.get(), (2, 3, 0, 1))) + addr = gpuarray.to_gpu(addr2) + + POUK.pr_update_ML_wavefield(addr, probe, probe_wavefield, object_array, exit_wave, atomics=atomics) + + expected_probe = np.array([[[625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j], + [625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j, 625. + 1.j]], + + [[786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j], + [786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j, 786. + 2.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(probe.get(), expected_probe, + err_msg="The probe has not been updated as expected") + + expected_probe_wavefield = np.array( + [[[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]], + + [[136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.], + [136., 136., 136., 136., 136.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(probe_wavefield.get(), expected_probe_wavefield, + err_msg="The probe wavefield has not been updated as expected") + + def test_pr_update_ML_wavefield_atomics_REGRESSION(self): + self.pr_update_ML_wavefield_tester(True) + + def test_pr_update_ML_wavefield_tiled_REGRESSION(self): + self.pr_update_ML_wavefield_tester(False) + + def ob_update_ML_wavefield_tester(self, atomics=True): + ''' + setup + ''' + addr, object_array, object_array_denominator, probe, exit_wave, probe_denominator = self.prepare_arrays() + object_array_wavefield = gpuarray.to_gpu(np.zeros_like(object_array, dtype=FLOAT_TYPE)) + ''' + test + ''' + POUK = PoUpdateKernel() + + POUK.allocate() # this doesn't do anything, but is the call pattern. + + if not atomics: + addr2 = np.ascontiguousarray(np.transpose(addr.get(), (2, 3, 0, 1))) + addr = gpuarray.to_gpu(addr2) + + POUK.ob_update_ML_wavefield(addr, object_array, object_array_wavefield, probe, exit_wave, atomics=atomics) + + expected_object_array = np.array( + [[[29. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 105. + 1.j, 77. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [153. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 401. + 1.j, 249. + 1.j, 1. + 1.j], + [125. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 297. + 1.j, 173. + 1.j, 1. + 1.j], + [1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j, 1. + 1.j]], + + [[44. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 132. + 4.j, 92. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [180. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 452. + 4.j, 276. + 4.j, 4. + 4.j], + [140. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 324. + 4.j, 188. + 4.j, 4. + 4.j], + [4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j, 4. + 4.j]]], + dtype=COMPLEX_TYPE) + np.testing.assert_array_equal(object_array.get(), expected_object_array, + err_msg="The object array has not been updated as expected") + + expected_object_array_wavefield = np.array( + [[[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]], + + [[10., 20., 20., 20., 20., 10., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [20., 40., 40., 40., 40., 20., 0.], + [10., 20., 20., 20., 20., 10., 0.], + [ 0., 0., 0., 0., 0., 0., 0.]]], + dtype=FLOAT_TYPE) + np.testing.assert_array_equal(object_array_wavefield.get(), expected_object_array_wavefield, + err_msg="The object array wavefield has not been updated as expected") + + def test_ob_update_ML_wavefield_atomics_REGRESSION(self): + self.ob_update_ML_wavefield_tester(True) + + def test_ob_update_ML_wavefield_tiled_REGRESSION(self): + self.ob_update_ML_wavefield_tester(False) + def test_ob_update_local_UNITY(self): ''' setup From 3c4a686e68367b5c35c363c3f02a85a278b4489a Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Tue, 29 Apr 2025 16:49:03 +0100 Subject: [PATCH 10/12] Fixed issues with ML engine tests by adding normalisation (#613) --- .../base_tests/engine_test.py | 27 +++++++++++-------- .../{engine_tests.py => engine_test.py} | 17 +++++++----- .../{engine_tests.py => engine_test.py} | 19 ++++++++----- 3 files changed, 39 insertions(+), 24 deletions(-) rename test/accelerate_tests/cuda_cupy_tests/{engine_tests.py => engine_test.py} (94%) rename test/accelerate_tests/cuda_pycuda_tests/{engine_tests.py => engine_test.py} (93%) diff --git a/test/accelerate_tests/base_tests/engine_test.py b/test/accelerate_tests/base_tests/engine_test.py index 30cb84a55..5b108ce88 100644 --- a/test/accelerate_tests/base_tests/engine_test.py +++ b/test/accelerate_tests/base_tests/engine_test.py @@ -24,7 +24,7 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.outpath) - def check_engine_output(self, output, plotting=False, debug=False): + def check_engine_output(self, output, plotting=False, debug=False, tol=0.1): P_ML, P_ML_serial = output numiter = len(P_ML.runtime["iter_info"]) LL_ML = np.array([P_ML.runtime["iter_info"][i]["error"][1] for i in range(numiter)]) @@ -34,6 +34,11 @@ def check_engine_output(self, output, plotting=False, debug=False): PRB_ML_serial, PRB_ML = P_ML_serial.probe.S["SMFG00"].data[0], P_ML.probe.S["SMFG00"].data[0] eng_ML = P_ML.engines["engine00"] eng_ML_serial = P_ML_serial.engines["engine00"] + # Normalize the outputs + PRB_ML_max = np.abs(PRB_ML).max() + PRB_ML_serial_max = np.abs(PRB_ML_serial).max() + OBJ_ML_serial *= (PRB_ML_serial_max / PRB_ML_max) + PRB_ML_serial /= (PRB_ML_serial_max / PRB_ML_max) if debug: import matplotlib.pyplot as plt plt.figure("ML debug") @@ -67,14 +72,14 @@ def check_engine_output(self, output, plotting=False, debug=False): # np.testing.assert_allclose(eng_ML.debug, eng_ML_serial.debug, atol=1e-7, rtol=1e-7, # err_msg="The debug arrays are not matching as expected") RMSE_ob = (np.mean(np.abs(OBJ_ML_serial - OBJ_ML)**2)) - RMSE_pr = (np.mean(np.abs(PRB_ML_serial - PRB_ML)**2)) - # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-1, + RMSE_pr = (np.mean(np.abs(PRB_ML_serial - PRB_ML)**2)) + #MSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-1, - err_msg="The object arrays are not matching as expected") - # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, - # err_msg="The log-likelihood errors are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") + #p.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, + # err_msg="The log-likelihood errors are not matching as expected") def test_ML_serial_base(self): @@ -152,13 +157,13 @@ def test_ML_serial_smoothing_regularizer(self): scanmodel="BlockFull", autosave=False, verbose_level="critical")) self.check_engine_output(out, plotting=False, debug=False) - @pytest.mark.skip(reason="Funny behaviour with this test, most likely due to numerical instabilities, see issue #612") + @pytest.mark.skip(reason="Funny behaviour with this test, most likely related to Gaussian filter, see issue #607") def test_ML_serial_wavefield_preconditioner(self): out = [] for eng in ["ML", "ML_serial"]: engine_params = u.Param() engine_params.name = eng - engine_params.numiter = 100 + engine_params.numiter = 500 engine_params.floating_intensities = False engine_params.reg_del2 = False engine_params.reg_del2_amplitude = 1. @@ -170,7 +175,7 @@ def test_ML_serial_wavefield_preconditioner(self): engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) - self.check_engine_output(out, plotting=False, debug=False) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_serial_all(self): out = [] diff --git a/test/accelerate_tests/cuda_cupy_tests/engine_tests.py b/test/accelerate_tests/cuda_cupy_tests/engine_test.py similarity index 94% rename from test/accelerate_tests/cuda_cupy_tests/engine_tests.py rename to test/accelerate_tests/cuda_cupy_tests/engine_test.py index 2dc64c34d..67851d2cc 100644 --- a/test/accelerate_tests/cuda_cupy_tests/engine_tests.py +++ b/test/accelerate_tests/cuda_cupy_tests/engine_test.py @@ -23,7 +23,7 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.outpath) - def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): + def check_engine_output(self, output, plotting=False, debug=False, scan="MF", tol=0.1): key = "S%sG00" %scan P_ML_serial, P_ML_cupy = output numiter = len(P_ML_serial.runtime["iter_info"]) @@ -36,6 +36,11 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): MED_ML_cupy = np.median(np.angle(OBJ_ML_cupy)) eng_ML_serial = P_ML_serial.engines["engine00"] eng_ML_cupy = P_ML_cupy.engines["engine00"] + # Normalize the outputs + PRB_ML_serial_max = np.abs(PRB_ML_serial).max() + PRB_ML_cupy_max = np.abs(PRB_ML_cupy).max() + OBJ_ML_cupy *= (PRB_ML_cupy_max / PRB_ML_serial_max) + PRB_ML_cupy /= (PRB_ML_cupy_max / PRB_ML_serial_max) if debug: import matplotlib.pyplot as plt plt.figure("ML serial debug") @@ -71,10 +76,10 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): RMSE_ob = (np.mean(np.abs(OBJ_ML_cupy - OBJ_ML_serial)**2)) RMSE_pr = (np.mean(np.abs(PRB_ML_cupy - PRB_ML_serial)**2)) # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, - err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, err_msg="The object arrays are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") @@ -156,7 +161,7 @@ def test_ML_cupy_wavefield_preconditioner(self): for eng in ["ML_serial", "ML_cupy"]: engine_params = u.Param() engine_params.name = eng - engine_params.numiter = 200 + engine_params.numiter = 500 engine_params.floating_intensities = False engine_params.reg_del2 = False engine_params.reg_del2_amplitude = 1. @@ -168,7 +173,7 @@ def test_ML_cupy_wavefield_preconditioner(self): engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) - self.check_engine_output(out, plotting=False, debug=False) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_cupy_all(self): out = [] diff --git a/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py b/test/accelerate_tests/cuda_pycuda_tests/engine_test.py similarity index 93% rename from test/accelerate_tests/cuda_pycuda_tests/engine_tests.py rename to test/accelerate_tests/cuda_pycuda_tests/engine_test.py index bf1223c2c..39184b2ff 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/engine_tests.py +++ b/test/accelerate_tests/cuda_pycuda_tests/engine_test.py @@ -23,7 +23,7 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.outpath) - def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): + def check_engine_output(self, output, plotting=False, debug=False, scan="MF", tol=0.1): key = "S%sG00" %scan P_ML_serial, P_ML_pycuda = output numiter = len(P_ML_serial.runtime["iter_info"]) @@ -36,6 +36,11 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): MED_ML_pycuda = np.median(np.angle(OBJ_ML_pycuda)) eng_ML_serial = P_ML_serial.engines["engine00"] eng_ML_pycuda = P_ML_pycuda.engines["engine00"] + # Normalize the outputs + PRB_ML_serial_max = np.abs(PRB_ML_serial).max() + PRB_ML_pycuda_max = np.abs(PRB_ML_pycuda).max() + OBJ_ML_pycuda *= (PRB_ML_pycuda_max / PRB_ML_serial_max) + PRB_ML_pycuda /= (PRB_ML_pycuda_max / PRB_ML_serial_max) if debug: import matplotlib.pyplot as plt plt.figure("ML serial debug") @@ -71,10 +76,10 @@ def check_engine_output(self, output, plotting=False, debug=False, scan="MF"): RMSE_ob = (np.mean(np.abs(OBJ_ML_pycuda - OBJ_ML_serial)**2)) RMSE_pr = (np.mean(np.abs(PRB_ML_pycuda - PRB_ML_serial)**2)) # RMSE_LL = (np.mean(np.abs(LL_ML_serial - LL_ML)**2)) - np.testing.assert_allclose(RMSE_ob, 0.0, atol=1e-2, - err_msg="The object arrays are not matching as expected") - np.testing.assert_allclose(RMSE_pr, 0.0, atol=1e-2, + np.testing.assert_allclose(RMSE_ob, 0.0, atol=tol, err_msg="The object arrays are not matching as expected") + np.testing.assert_allclose(RMSE_pr, 0.0, atol=tol, + err_msg="The probe arrays are not matching as expected") # np.testing.assert_allclose(RMSE_LL, 0.0, atol=1e-7, # err_msg="The log-likelihood errors are not matching as expected") @@ -149,14 +154,14 @@ def test_ML_pycuda_smoothing_regularizer(self): engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) - self.check_engine_output(out, plotting=False, debug=False) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_pycuda_wavefield_preconditioner(self): out = [] for eng in ["ML_serial", "ML_pycuda"]: engine_params = u.Param() engine_params.name = eng - engine_params.numiter = 200 + engine_params.numiter = 700 engine_params.floating_intensities = False engine_params.reg_del2 = False engine_params.reg_del2_amplitude = 1. @@ -168,7 +173,7 @@ def test_ML_pycuda_wavefield_preconditioner(self): engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, scanmodel="BlockFull", autosave=False, verbose_level="critical")) - self.check_engine_output(out, plotting=False, debug=False) + self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_pycuda_all(self): out = [] From d7ec1c9c6216cfad58d42a031bd67e8201a28bbd Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Tue, 29 Apr 2025 21:26:05 +0100 Subject: [PATCH 11/12] Add option to provide block size for engine test runner (#614) --- test/accelerate_tests/base_tests/engine_test.py | 12 ++++++------ .../cuda_cupy_tests/engine_test.py | 14 +++++++------- .../cuda_pycuda_tests/engine_test.py | 14 +++++++------- test/utils.py | 4 +++- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/test/accelerate_tests/base_tests/engine_test.py b/test/accelerate_tests/base_tests/engine_test.py index 5b108ce88..3177906fe 100644 --- a/test/accelerate_tests/base_tests/engine_test.py +++ b/test/accelerate_tests/base_tests/engine_test.py @@ -93,7 +93,7 @@ def test_ML_serial_base(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_serial_regularizer(self): @@ -107,7 +107,7 @@ def test_ML_serial_regularizer(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) @@ -123,7 +123,7 @@ def test_ML_serial_preconditioner(self): engine_params.scale_precond = True engine_params.scale_probe_object = 1e-6 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_serial_floating(self): @@ -137,7 +137,7 @@ def test_ML_serial_floating(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) @pytest.mark.skip(reason="Funny behaviour with this test, most likely related to Gaussian filter, see issue #607") @@ -154,7 +154,7 @@ def test_ML_serial_smoothing_regularizer(self): engine_params.smooth_gradient_decay = 1/10. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) @pytest.mark.skip(reason="Funny behaviour with this test, most likely related to Gaussian filter, see issue #607") @@ -174,7 +174,7 @@ def test_ML_serial_wavefield_preconditioner(self): engine_params.wavefield_delta_object = 0.1 engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_serial_all(self): diff --git a/test/accelerate_tests/cuda_cupy_tests/engine_test.py b/test/accelerate_tests/cuda_cupy_tests/engine_test.py index 67851d2cc..2eae5c545 100644 --- a/test/accelerate_tests/cuda_cupy_tests/engine_test.py +++ b/test/accelerate_tests/cuda_cupy_tests/engine_test.py @@ -94,7 +94,7 @@ def test_ML_cupy_base(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_cupy_regularizer(self): @@ -108,7 +108,7 @@ def test_ML_cupy_regularizer(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_cupy_preconditioner(self): @@ -123,7 +123,7 @@ def test_ML_cupy_preconditioner(self): engine_params.scale_precond = True engine_params.scale_probe_object = 1e-6 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_cupy_floating(self): @@ -137,7 +137,7 @@ def test_ML_cupy_floating(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_cupy_smoothing_regularizer(self): @@ -153,7 +153,7 @@ def test_ML_cupy_smoothing_regularizer(self): engine_params.smooth_gradient_decay = 1/10. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_cupy_wavefield_preconditioner(self): @@ -172,7 +172,7 @@ def test_ML_cupy_wavefield_preconditioner(self): engine_params.wavefield_delta_object = 0.1 engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_cupy_all(self): @@ -189,7 +189,7 @@ def test_ML_cupy_all(self): engine_params.scale_precond = True engine_params.scale_probe_object = 1e-6 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="info")) + scanmodel="BlockFull", autosave=False, verbose_level="info", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) if __name__ == "__main__": diff --git a/test/accelerate_tests/cuda_pycuda_tests/engine_test.py b/test/accelerate_tests/cuda_pycuda_tests/engine_test.py index 39184b2ff..b1148b8a6 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/engine_test.py +++ b/test/accelerate_tests/cuda_pycuda_tests/engine_test.py @@ -94,7 +94,7 @@ def test_ML_pycuda_base(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_pycuda_regularizer(self): @@ -108,7 +108,7 @@ def test_ML_pycuda_regularizer(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_pycuda_preconditioner(self): @@ -123,7 +123,7 @@ def test_ML_pycuda_preconditioner(self): engine_params.scale_precond = True engine_params.scale_probe_object = 1e-6 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_pycuda_floating(self): @@ -137,7 +137,7 @@ def test_ML_pycuda_floating(self): engine_params.reg_del2_amplitude = 1. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) def test_ML_pycuda_smoothing_regularizer(self): @@ -153,7 +153,7 @@ def test_ML_pycuda_smoothing_regularizer(self): engine_params.smooth_gradient_decay = 1/10. engine_params.scale_precond = False out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_pycuda_wavefield_preconditioner(self): @@ -172,7 +172,7 @@ def test_ML_pycuda_wavefield_preconditioner(self): engine_params.wavefield_delta_object = 0.1 engine_params.wavefield_delta_probe = 0.1 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="critical")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False, tol=0.3) def test_ML_pycuda_all(self): @@ -189,7 +189,7 @@ def test_ML_pycuda_all(self): engine_params.scale_precond = True engine_params.scale_probe_object = 1e-6 out.append(tu.EngineTestRunner(engine_params, output_path=self.outpath, init_correct_probe=True, - scanmodel="BlockFull", autosave=False, verbose_level="info")) + scanmodel="BlockFull", autosave=False, verbose_level="critical", frames_per_block=100)) self.check_engine_output(out, plotting=False, debug=False) if __name__ == "__main__": diff --git a/test/utils.py b/test/utils.py index 5e184faa6..1a4e9dd3b 100644 --- a/test/utils.py +++ b/test/utils.py @@ -45,9 +45,11 @@ def PtyscanTestRunner(ptyscan_instance, data_params, save_type='append', auto_fr def EngineTestRunner(engine_params,propagator='farfield',output_path='./', output_file=None, - autosave=True, scanmodel="Full", verbose_level="info", init_correct_probe=False): + autosave=True, scanmodel="Full", verbose_level="info", init_correct_probe=False, + frames_per_block=100000): p = u.Param() + p.frames_per_block = frames_per_block p.verbose_level = verbose_level p.io = u.Param() p.io.home = output_path From f0a721626a556f5c1990e026447bc241758837c5 Mon Sep 17 00:00:00 2001 From: "Benedikt J. Daurer" Date: Thu, 8 May 2025 16:49:59 +0100 Subject: [PATCH 12/12] Added 0.9 release notes (#611) --- release_notes.md | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/release_notes.md b/release_notes.md index bed47e241..489885686 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,3 +1,29 @@ +# PtyPy 0.9 release notes + +We're excited to bring you a new release in preparation for the [PtyPy workshop 2025](https://www.synchrotron-soleil.fr/en/events/ptypy-2025) +held at SOLEIL. This release provides a new pre-conditioner for the ML engine and a few bug fixes. + +## ML with wavefield pre-conditioner + +We have added a new pre-conditioner to the ML engines, which seems to significantly improve probe retrieval +and overall convergence. It can be enabled by setting the following parameters + +``` +p.engines.engine.name = "ML" +p.engines.engine.wavefield_precond = True +p.engines.engine.wavefield_delta_object = 0.1 +p.engines.engine.wavefield_delta_probe = 0.1 +``` + +where ```wavefield_delta_object``` and ```wavefield_delta_probe``` are regularisation parameters to avoid +division by zero. + +## Other changes + +We have dropped support for Python <= 3.8 and adopted Numpy >= 2.0. +We have also fixed an issue with the filtered_fft build and made small improvements to the custom WASP engine. + + # PtyPy 0.8 release notes We're excited to bring you a new release, with new engines, CuPy support and @@ -43,9 +69,8 @@ Patch release. # PtyPy 0.7 release notes -This release is focused on improving the usability of PtyPy in Jupyter notebooks in preparation for the -[PtyPy workshop](https://www.diamond.ac.uk/Home/Events/2023/Ptychography--PtyPy--Software-Workshop-2023.html) -held at the Diamond Light Source in January 2023. The workshop features extensive interactive +This release is focused on improving the usability of PtyPy in Jupyter notebooks in preparation for the first +PtyPy workshop held at the Diamond Light Source in January 2023. The workshop features extensive interactive [tutorials](https://ptycho.github.io/tutorials) delivered using Jupyter notebooks. ## Build changes