From 6193256a61fcb4040b490016354e9c7262149143 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 18 Nov 2025 15:08:14 +0100 Subject: [PATCH] added support for fp128 through the quadmath library, run with either floating_type=q or compile with fptype=q to enable it (assuming quadmath is installed on the machine) --- .../aloha/template_files/gpu/helas.h | 18 ++- .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 7 +- .../template_files/gpu/CrossSectionKernels.cc | 36 +++-- .../iolibs/template_files/gpu/check_sa.cc | 15 +- .../iolibs/template_files/gpu/cudacpp.mk | 35 ++--- .../template_files/gpu/cudacpp_config.mk | 12 +- .../iolibs/template_files/gpu/cudacpp_src.mk | 6 +- .../iolibs/template_files/gpu/mgOnGpuConfig.h | 19 ++- .../template_files/gpu/mgOnGpuFptypes.h | 135 +++++++++++++++++- .../CUDACPP_SA_OUTPUT/model_handling.py | 6 +- 10 files changed, 233 insertions(+), 56 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h index fcfc4b3153..c2538082da 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h @@ -5,7 +5,7 @@ ! Copyright (C) 2020-2024 CERN and UCLouvain. ! Licensed under the GNU Lesser General Public License (version 3 or later). ! Modified by: O. Mattelaer (Mar 2020) for the MG5aMC CUDACPP plugin. -! Further modified by: O. Mattelaer, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +! Further modified by: O. Mattelaer, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin. !========================================================================== //-------------------------------------------------------------------------- @@ -194,7 +194,8 @@ if( pp == 0. ) { // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! - fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses + // Aliased abs to allow __float128 + fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0. }; // possibility of negative fermion masses //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here? sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here fi[2] = cxmake( ip * sqm[ip], 0 ); @@ -220,7 +221,8 @@ #else // Branch A: pp == 0. // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! - fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses (NB: SCALAR!) + // Aliased abs to allow __float128 (irrelevant in SIMD but kept for consistency) + fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0 }; // possibility of negative fermion masses (NB: SCALAR!) sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] ); // AV: removed an abs here (as above) const cxtype fiA_2 = ip * sqm[ip]; // scalar cxtype: real part initialised from fptype, imag part = 0 const cxtype fiA_3 = im * nsf * sqm[ip]; // scalar cxtype: real part initialised from fptype, imag part = 0 @@ -436,8 +438,8 @@ vc[1] = cxmake( pvec1 * (fptype)nsv, pvec2 * (fptype)nsv ); if( vmass != 0. ) { - const int nsvahl = nsv * std::abs( hel ); - const fptype hel0 = 1. - std::abs( hel ); + const int nsvahl = nsv * fpabs( hel ); + const fptype hel0 = 1. - fpabs( hel ); #ifndef MGONGPU_CPPSIMD const fptype_sv pt2 = ( pvec1 * pvec1 ) + ( pvec2 * pvec2 ); const fptype_sv pp = fpmin( pvec0, fpsqrt( pt2 + ( pvec3 * pvec3 ) ) ); @@ -604,7 +606,8 @@ if( pp == 0. ) { // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! - fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses + // Aliased abs to allow __float128 + fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0. }; // possibility of negative fermion masses //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here? sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here const int ip = -( ( 1 - nh ) / 2 ) * nhel; // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++ @@ -637,7 +640,8 @@ const fptype_sv pp = fpmin( pvec0, fpsqrt( p2 ) ); // Branch A: pp == 0. // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! - fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses + // Aliased abs to allow __float128 (irrelevant in SIMD, but for consistency) + fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0 }; // possibility of negative fermion masses sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] ); // AV: removed an abs here (as above) const int ipA = -( ( 1 - nh ) / 2 ) * nhel; const int imA = ( 1 + nh ) / 2 * nhel; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index 262d39a736..9a69fa1ad3 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -36,8 +36,13 @@ def compile(self, *args, **opts): {'override FPTYPE': self.run_card['floating_type'] }) misc.sprint('FPTYPE checked') cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py + if self.run_card['floating_type'] == 'q': + if cudacpp_backend not in [ 'fortran', 'cppnone' ]: + misc.sprint("WARNING: cudacpp_backend='%s' does not support quad precision. Switching to 'cppnone' backend."%cudacpp_backend) + cudacpp_backend = 'cppnone' logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' @@ -86,7 +91,7 @@ def default_setup(self): super().default_setup() self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f'], + allowed=['m','d','f', 'q'], comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc index bb1e49e3a7..799425f97a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin. #include "CrossSectionKernels.h" @@ -18,18 +18,15 @@ // *** NB: Attempts with __attribute__((optimize("-fno-fast-math"))) were unsatisfactory *** // ****************************************************************************************** -inline bool -fp_is_nan( const fptype& fp ) -{ - //#pragma clang diagnostic push - //#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661) - return std::isnan( fp ); // always false for clang in fast math mode (tautological compare)? - //#pragma clang diagnostic pop -} inline bool fp_is_abnormal( const fptype& fp ) { +#ifdef MGONGPUCPP_GPUIMPL +using namespace mg5amcGpu; +#else +using namespace mg5amcCpu; +#endif if( fp_is_nan( fp ) ) return true; if( fp != fp ) return true; return false; @@ -46,6 +43,9 @@ fp_is_zero( const fptype& fp ) inline const char* fp_show_class( const fptype& fp ) { +#ifdef MGONGPU_FPTYPE_QUAD // not bothering to implement the full fpclassify for quad + return "quad"; +#else switch( std::fpclassify( fp ) ) { case FP_INFINITE: return "Inf"; @@ -55,11 +55,17 @@ fp_show_class( const fptype& fp ) case FP_ZERO: return "zero"; default: return "unknown"; } +#endif } inline void debug_me_is_abnormal( const fptype& me, size_t ievtALL ) { +#ifdef MGONGPUCPP_GPUIMPL +using namespace mg5amcGpu; +#else +using namespace mg5amcCpu; +#endif std::cout << "DEBUG[" << ievtALL << "]" << " ME=" << me << " fpisabnormal=" << fp_is_abnormal( me ) @@ -67,12 +73,12 @@ debug_me_is_abnormal( const fptype& me, size_t ievtALL ) << " (me==me)=" << ( me == me ) << " (me==me+1)=" << ( me == me + 1 ) << " isnan=" << fp_is_nan( me ) - << " isfinite=" << std::isfinite( me ) - << " isnormal=" << std::isnormal( me ) + << " isfinite=" << fp_is_infinite( me ) + << " isnormal=" << fp_is_normal( me ) << " is0=" << ( me == 0 ) << " is1=" << ( me == 1 ) - << " abs(ME)=" << std::abs( me ) - << " isnan=" << fp_is_nan( std::abs( me ) ) + << " abs(ME)=" << fpabs( me ) + << " isnan=" << fp_is_nan( fpabs( me ) ) << std::endl; } @@ -172,8 +178,8 @@ namespace mg5amcCpu const fptype& me = MemoryAccessMatrixElements::ieventAccessConst( m_matrixElements.data(), ievt ); const fptype& wg = MemoryAccessWeights::ieventAccessConst( m_samplingWeights.data(), ievt ); if( fp_is_abnormal( me ) ) continue; - stats.sqsMEdiff += std::pow( me - stats.refME, 2 ); - stats.sqsWGdiff += std::pow( wg - stats.refWG, 2 ); + stats.sqsMEdiff += (double)fppow( me - stats.refME, 2 ); + stats.sqsWGdiff += (double)fppow( wg - stats.refWG, 2 ); } // FOURTH PASS: UPDATE THE OVERALL STATS BY ADDING THE NEW STATS m_stats += stats; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index aee105f269..b5d0fb41b5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -4,7 +4,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Modified by: O. Mattelaer (Nov 2020) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin. //========================================================================== #include "mgOnGpuConfig.h" @@ -704,6 +704,11 @@ main( int argc, char** argv ) { if( verbose ) { +#ifdef MGONGPU_FPTYPE_QUAD +size_t width = 42; +#else +size_t width = 14; +#endif // Display momenta std::cout << "Momenta:" << std::endl; for( int ipar = 0; ipar < CPPProcess::npar; ipar++ ) @@ -711,10 +716,10 @@ main( int argc, char** argv ) // NB: 'setw' affects only the next field (of any type) std::cout << std::scientific // fixed format: affects all floats (default precision: 6) << std::setw( 4 ) << ipar + 1 - << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 0, ipar ) - << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 1, ipar ) - << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 2, ipar ) - << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 3, ipar ) + << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 0, ipar ) + << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 1, ipar ) + << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 2, ipar ) + << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 3, ipar ) << std::endl << std::defaultfloat; // default format: affects all floats } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 22acd3abe9..a2cbc7e6fe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2025 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: S. Hageboeck, D. Massaro, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2025) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, D. Massaro, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts @@ -14,6 +14,17 @@ override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- +#=== Include the common MG5aMC Makefile options + +# OM: including make_opts is crucial for MG5aMC flag consistency/documentation +# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp) +# ZW: need to include make_opts prior to cudacpp_config.mk to pass flags for quadmath +ifneq ($(wildcard ../../Source/make_opts),) + include ../../Source/make_opts +endif + +#------------------------------------------------------------------------------- + #=== Include cudacpp_config.mk # Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported (and configure defaults if no user-defined choices exist) @@ -44,16 +55,6 @@ UNAME_P := $(shell uname -p) #------------------------------------------------------------------------------- -#=== Include the common MG5aMC Makefile options - -# OM: including make_opts is crucial for MG5aMC flag consistency/documentation -# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp) -ifneq ($(wildcard ../../Source/make_opts),) - include ../../Source/make_opts -endif - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -577,8 +578,10 @@ else ifeq ($(FPTYPE),f) else ifeq ($(FPTYPE),m) CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT -else - $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +else ifeq ($(FPTYPE),q) + CXXFLAGS += -DMGONGPU_FPTYPE_QUAD +else ifeq + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f', 'm' and 'q' are supported) endif # Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") @@ -811,7 +814,7 @@ endif # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) $(BUILDDIR)/%%_cpp.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ + $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ $(QUADFLAG) # Generic target and build rules: objects from CUDA or HIP compilation ifneq ($(GPUCC),) @@ -847,7 +850,7 @@ endif $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(QUADFLAG) ifneq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o @@ -875,7 +878,7 @@ endif ###$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSASAN) $(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o - $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) $(QUADFLAG) ifneq ($(GPUCC),) ###$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSASAN) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk index b57e56d182..30bf67cdfc 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk @@ -1,13 +1,21 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: A. Valassi (Mar 2024) for the MG5aMC CUDACPP plugin. -# Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. +# Further modified by: A. Valassi, Z. Wettersten (2024-2025) for the MG5aMC CUDACPP plugin. #------------------------------------------------------------------------------- #=== Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported #=== Configure default values for these variables if no user-defined choices exist +# Include libquadmath if FPTYPE=quad and set backend to cppnone +ifeq ($(FPTYPE),q) + override QUADFLAG = -lquadmath + override BACKEND = cppnone +else + override QUADFLAG = +endif + # Set the default BACKEND (CUDA, HIP or C++/SIMD) choice ifeq ($(BACKEND),) override BACKEND = cppauto @@ -39,7 +47,7 @@ ifneq ($(words $(filter $(BACKEND), $(SUPPORTED_BACKENDS))),1) $(error Invalid backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) endif -override SUPPORTED_FPTYPES = d f m +override SUPPORTED_FPTYPES = d f m q ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1) $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)')) endif diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index 897de8caa8..485bd3bb7b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: assume that the same name (e.g. cudacpp.mk, Makefile...) is used in the Subprocess and src directories @@ -132,7 +132,7 @@ $(LIBDIR)/.build.$(TAG): # Generic target and build rules: objects from C++ compilation $(BUILDDIR)/%%_cpp.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ + $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ $(QUADFLAG) # Generic target and build rules: objects from CUDA compilation ifneq ($(GPUCC),) @@ -154,7 +154,7 @@ endif ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) $(QUADFLAG) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h index c32d0a2740..af441fce19 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h @@ -1,11 +1,16 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Jul 2020) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin. #ifndef MGONGPUCONFIG_H #define MGONGPUCONFIG_H 1 +// When using quad, make sure to include quadmath.h everywhere +#ifdef MGONGPU_FPTYPE_QUAD +#include +#endif + // HARDCODED AT CODE GENERATION TIME: DO NOT MODIFY (#473) // There are two different code bases for standalone_cudacpp (without multichannel) and madevent+cudacpp (with multichannel) %(mgongpu_supports_multichannel)s @@ -61,7 +66,7 @@ // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) -#if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT +#if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT and not defined MGONGPU_FPTYPE_QUAD // Floating point precision (CHOOSE ONLY ONE) #define MGONGPU_FPTYPE_DOUBLE 1 // default //#define MGONGPU_FPTYPE_FLOAT 1 // 2x faster @@ -69,7 +74,8 @@ // Choose floating point precision (for color algebra alone #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE2_FLOAT, nothing happens (issue #167) -#if not defined MGONGPU_FPTYPE2_DOUBLE and not defined MGONGPU_FPTYPE2_FLOAT +// ZW: including singular flag for quad precision (quad intended for tests, not production) +#if not defined MGONGPU_FPTYPE2_DOUBLE and not defined MGONGPU_FPTYPE2_FLOAT and not defined MGONGPU_FPTYPE_QUAD // Floating point precision (CHOOSE ONLY ONE) #define MGONGPU_FPTYPE2_DOUBLE 1 // default //#define MGONGPU_FPTYPE2_FLOAT 1 // 2x faster @@ -177,6 +183,10 @@ namespace mgOnGpu // --- Type definitions // Floating point type (for everything but color algebra #537): fptype +#ifdef MGONGPU_FPTYPE_QUAD + typedef __float128 fptype; // quad precision (16 bytes, fp128) + typedef __float128 fptype2; // quad precision (16 bytes, fp128) +#else #if defined MGONGPU_FPTYPE_DOUBLE typedef double fptype; // double precision (8 bytes, fp64) #elif defined MGONGPU_FPTYPE_FLOAT @@ -189,6 +199,7 @@ namespace mgOnGpu #elif defined MGONGPU_FPTYPE2_FLOAT typedef float fptype2; // single precision (4 bytes, fp32) #endif +#endif // #ifdef MGONGPU_FPTYPE_QUAD // --- Platform-specific software implementation details @@ -217,6 +228,8 @@ using mgOnGpu::fptype2; // C++ SIMD vectorization width (this will be used to set neppV) #ifdef MGONGPUCPP_GPUIMPL // CUDA and HIP implementations have no SIMD #undef MGONGPU_CPPSIMD +#elif defined MGONGPU_FPTYPE_QUAD // quad precision: no SIMD +#undef MGONGPU_CPPSIMD #elif defined __AVX512VL__ && defined MGONGPU_PVW512 // C++ "512z" AVX512 with 512 width (512-bit ie 64-byte): 8 (DOUBLE) or 16 (FLOAT) #ifdef MGONGPU_FPTYPE_DOUBLE #define MGONGPU_CPPSIMD 8 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h index 960beeeeae..399ccef58d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin. #ifndef MGONGPUFPTYPES_H #define MGONGPUFPTYPES_H 1 @@ -10,6 +10,10 @@ #include #include +#ifdef MGONGPU_FPTYPE_QUAD +#include +#include // for operator<< overloading +#endif // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725) #ifdef MGONGPUCPP_GPUIMPL // cuda @@ -70,6 +74,101 @@ namespace mg5amcCpu #ifndef MGONGPUCPP_GPUIMPL +#ifdef MGONGPU_FPTYPE_QUAD + + //------------------------------ + // Quad precision types - C++ + //------------------------------ + + // quad max returns by value, not by reference + inline fptype + fpmax( const fptype& a, const fptype& b ) + { + return fmaxq( a, b ); + } + + // quad min returns by value, not by reference + inline fptype + fpmin( const fptype& a, const fptype& b ) + { + return fminq( a, b ); + } + + inline fptype + fpsqrt( const fptype& f ) + { + return sqrtq( f ); + } + + inline fptype + fpabs( const fptype& f ) + { + return fabsq( f ); + } + + inline fptype + fppow( const fptype& base, const int& exp ) + { + // Special case for positive integer exponents + if ( exp >= 0 ) + { + fptype result = static_cast<__float128>( 1.0 ); + for ( int i = 0; i < exp; ++i ) + { + result *= base; + } + return result; + } + // General case + return powq( base, static_cast<__float128>( exp ) ); + } + + inline fptype + fppow( const fptype& base, const fptype& exp ) + { + return powq( base, exp ); + } + + // Overload operator<< for quad precision (need to convert to string first) + inline std::ostream& + operator<<( std::ostream& os, const fptype& f ) + { + char buffer[128]; + // Convert __float128 to string with 36 digits of precision + int n = quadmath_snprintf( buffer, sizeof( buffer ), "%.36Qg", f ); + if ( n > 0 && n < static_cast( sizeof( buffer ) ) ) + { + os << buffer; + } + else + { + os << "ConversionError"; + } + return os; + } + + inline bool + fp_is_infinite( const fptype& fp ) + { + return isinfq( fp ); + } + + inline bool + fp_is_nan( const fptype& fp ) + { + return isnanq( fp ); + } + +#define UNUSED(x) (void)(x) + inline bool + fp_is_normal( const fptype& fp ) + { + UNUSED(fp); + return true; // no isnormalq in quadmath.h + } + +#else + //------------------------------ // Floating point types - C++ //------------------------------ @@ -92,6 +191,40 @@ namespace mg5amcCpu return std::sqrt( f ); } + inline fptype + fpabs( const fptype& f ) + { + return std::abs( f ); + } + + inline fptype + fppow( const fptype& base, const int& exp ) + { + return std::pow( base, static_cast( exp ) ); + } + + inline bool + fp_is_infinite( const fptype& fp ) + { + return std::isinf( fp ); + } + + inline bool + fp_is_nan( const fptype& fp ) + { + //#pragma clang diagnostic push + //#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661) + return std::isnan( fp ); // always false for clang in fast math mode (tautological compare)? + //#pragma clang diagnostic pop + } + + inline bool + fp_is_normal( const fptype& fp ) + { + return std::isnormal( fp ); + } + +#endif // #ifdef MGONGPU_FPTYPE_QUAD #endif // #ifndef MGONGPUCPP_GPUIMPL //========================================================================== diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 3f8a85afa6..3ab422e693 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -201,7 +201,7 @@ def get_header_txt(self, name=None, couplings=None,mode=''): list_arg = '[]' # AV from cxtype_sv to fptype array (running alphas #373) point = self.type2def['pointer_coup'] args.append('%s %s%s%s'% (type, point, argname, list_arg)) - args.append('double Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628 + args.append('fptype Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628 | ZW: changed from 'double' to 'fptype' for quad else: args.append('%s %s%s'% (type, argname, list_arg)) if not self.offshell: @@ -805,7 +805,7 @@ def super_write_set_parameters_donotfixMajorana(self, params): for param in params: res_strings.append( "%s" % param.expr ) res = "\n".join(res_strings) - res = res.replace('ABS(','std::abs(') # for SMEFT #614 and #616 + res = res.replace('ABS(','fpabs(') # for SMEFT #614 and #616 return res # AV - replace export_cpp.UFOModelConverterCPP method (eventually split writing of parameters and fixes for Majorana particles #622) @@ -823,7 +823,7 @@ def super_write_set_parameters_onlyfixMajorana(self, hardcoded): # FIXME! split res_strings.append( prefix+" constexpr double %(W)s = %(W)s_sign * %(W)s_abs;" % { 'W' : particle.get('width') } ) else: res_strings.append( prefix+" if( %s < 0 )" % particle.get('mass')) - res_strings.append( prefix+" %(width)s = -std::abs( %(width)s );" % {"width": particle.get('width')}) + res_strings.append( prefix+" %(width)s = -fpabs( %(width)s );" % {"width": particle.get('width')}) if len( res_strings ) != 0 : res_strings = [ prefix + " // Fixes for Majorana particles" ] + res_strings if not hardcoded: return '\n' + '\n'.join(res_strings) if res_strings else '' else: return '\n' + '\n'.join(res_strings) + '\n' if res_strings else '\n'