From 6193256a61fcb4040b490016354e9c7262149143 Mon Sep 17 00:00:00 2001
From: Zenny Wettersten <zenny.wettersten@cern.ch>
Date: Tue, 18 Nov 2025 15:08:14 +0100
Subject: [PATCH] added support for fp128 through the quadmath library, run
 with either floating_type=q or compile with fptype=q to enable it (assuming
 quadmath is installed on the machine)

---
 .../aloha/template_files/gpu/helas.h          |  18 ++-
 .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py |   7 +-
 .../template_files/gpu/CrossSectionKernels.cc |  36 +++--
 .../iolibs/template_files/gpu/check_sa.cc     |  15 +-
 .../iolibs/template_files/gpu/cudacpp.mk      |  35 ++---
 .../template_files/gpu/cudacpp_config.mk      |  12 +-
 .../iolibs/template_files/gpu/cudacpp_src.mk  |   6 +-
 .../iolibs/template_files/gpu/mgOnGpuConfig.h |  19 ++-
 .../template_files/gpu/mgOnGpuFptypes.h       | 135 +++++++++++++++++-
 .../CUDACPP_SA_OUTPUT/model_handling.py       |   6 +-
 10 files changed, 233 insertions(+), 56 deletions(-)

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h
index fcfc4b3153..c2538082da 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h
@@ -5,7 +5,7 @@
 ! Copyright (C) 2020-2024 CERN and UCLouvain.
 ! Licensed under the GNU Lesser General Public License (version 3 or later).
 ! Modified by: O. Mattelaer (Mar 2020) for the MG5aMC CUDACPP plugin.
-! Further modified by: O. Mattelaer, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
+! Further modified by: O. Mattelaer, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
 !==========================================================================
   //--------------------------------------------------------------------------
 
@@ -194,7 +194,8 @@
       if( pp == 0. )
       {
         // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs!
-        fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses
+        // Aliased abs to allow __float128
+        fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0. }; // possibility of negative fermion masses
         //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here?
         sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here
         fi[2] = cxmake( ip * sqm[ip], 0 );
@@ -220,7 +221,8 @@
 #else
       // Branch A: pp == 0.
       // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs!
-      fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses (NB: SCALAR!)
+      // Aliased abs to allow __float128 (irrelevant in SIMD but kept for consistency)
+      fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0 }; // possibility of negative fermion masses (NB: SCALAR!)
       sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] );          // AV: removed an abs here (as above)
       const cxtype fiA_2 = ip * sqm[ip];                  // scalar cxtype: real part initialised from fptype, imag part = 0
       const cxtype fiA_3 = im * nsf * sqm[ip];            // scalar cxtype: real part initialised from fptype, imag part = 0
@@ -436,8 +438,8 @@
     vc[1] = cxmake( pvec1 * (fptype)nsv, pvec2 * (fptype)nsv );
     if( vmass != 0. )
     {
-      const int nsvahl = nsv * std::abs( hel );
-      const fptype hel0 = 1. - std::abs( hel );
+      const int nsvahl = nsv * fpabs( hel );
+      const fptype hel0 = 1. - fpabs( hel );
 #ifndef MGONGPU_CPPSIMD
       const fptype_sv pt2 = ( pvec1 * pvec1 ) + ( pvec2 * pvec2 );
       const fptype_sv pp = fpmin( pvec0, fpsqrt( pt2 + ( pvec3 * pvec3 ) ) );
@@ -604,7 +606,8 @@
       if( pp == 0. )
       {
         // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs!
-        fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses
+        // Aliased abs to allow __float128
+        fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0. }; // possibility of negative fermion masses
         //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here?
         sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here
         const int ip = -( ( 1 - nh ) / 2 ) * nhel;  // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++
@@ -637,7 +640,8 @@
       const fptype_sv pp = fpmin( pvec0, fpsqrt( p2 ) );
       // Branch A: pp == 0.
       // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs!
-      fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses
+      // Aliased abs to allow __float128 (irrelevant in SIMD, but for consistency)
+      fptype sqm[2] = { fpsqrt( fpabs( fmass ) ), 0 }; // possibility of negative fermion masses
       sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] );          // AV: removed an abs here (as above)
       const int ipA = -( ( 1 - nh ) / 2 ) * nhel;
       const int imA = ( 1 + nh ) / 2 * nhel;
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
index 262d39a736..9a69fa1ad3 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
@@ -36,8 +36,13 @@ def compile(self, *args, **opts):
                 {'override FPTYPE': self.run_card['floating_type'] })
             misc.sprint('FPTYPE checked')
         cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ]
+            
         if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):            
             cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py
+            if self.run_card['floating_type'] == 'q':
+                if cudacpp_backend not in [ 'fortran', 'cppnone' ]:
+                    misc.sprint("WARNING: cudacpp_backend='%s' does not support quad precision. Switching to 'cppnone' backend."%cudacpp_backend)
+                    cudacpp_backend = 'cppnone'
             logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
             if cudacpp_backend in cudacpp_supported_backends :
                 args[0][0] = 'madevent_' + cudacpp_backend + '_link'
@@ -86,7 +91,7 @@ def default_setup(self):
         super().default_setup()
         self.add_param('floating_type', 'm', include=False, hidden=True,
                        fct_mod=(self.reset_makeopts,(),{}),
-                       allowed=['m','d','f'],
+                       allowed=['m','d','f', 'q'],
                        comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)'
                        )
         cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ]
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc
index bb1e49e3a7..799425f97a 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CrossSectionKernels.cc
@@ -1,7 +1,7 @@
 // Copyright (C) 2020-2024 CERN and UCLouvain.
 // Licensed under the GNU Lesser General Public License (version 3 or later).
 // Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
-// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin.
 
 #include "CrossSectionKernels.h"
 
@@ -18,18 +18,15 @@
 // *** NB: Attempts with __attribute__((optimize("-fno-fast-math"))) were unsatisfactory  ***
 // ******************************************************************************************
 
-inline bool
-fp_is_nan( const fptype& fp )
-{
-  //#pragma clang diagnostic push
-  //#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661)
-  return std::isnan( fp ); // always false for clang in fast math mode (tautological compare)?
-  //#pragma clang diagnostic pop
-}
 
 inline bool
 fp_is_abnormal( const fptype& fp )
 {
+#ifdef MGONGPUCPP_GPUIMPL
+using namespace mg5amcGpu;
+#else
+using namespace mg5amcCpu;
+#endif
   if( fp_is_nan( fp ) ) return true;
   if( fp != fp ) return true;
   return false;
@@ -46,6 +43,9 @@ fp_is_zero( const fptype& fp )
 inline const char*
 fp_show_class( const fptype& fp )
 {
+#ifdef MGONGPU_FPTYPE_QUAD // not bothering to implement the full fpclassify for quad
+  return "quad";
+#else
   switch( std::fpclassify( fp ) )
   {
     case FP_INFINITE: return "Inf";
@@ -55,11 +55,17 @@ fp_show_class( const fptype& fp )
     case FP_ZERO: return "zero";
     default: return "unknown";
   }
+#endif
 }
 
 inline void
 debug_me_is_abnormal( const fptype& me, size_t ievtALL )
 {
+#ifdef MGONGPUCPP_GPUIMPL
+using namespace mg5amcGpu;
+#else
+using namespace mg5amcCpu;
+#endif
   std::cout << "DEBUG[" << ievtALL << "]"
             << " ME=" << me
             << " fpisabnormal=" << fp_is_abnormal( me )
@@ -67,12 +73,12 @@ debug_me_is_abnormal( const fptype& me, size_t ievtALL )
             << " (me==me)=" << ( me == me )
             << " (me==me+1)=" << ( me == me + 1 )
             << " isnan=" << fp_is_nan( me )
-            << " isfinite=" << std::isfinite( me )
-            << " isnormal=" << std::isnormal( me )
+            << " isfinite=" << fp_is_infinite( me )
+            << " isnormal=" << fp_is_normal( me )
             << " is0=" << ( me == 0 )
             << " is1=" << ( me == 1 )
-            << " abs(ME)=" << std::abs( me )
-            << " isnan=" << fp_is_nan( std::abs( me ) )
+            << " abs(ME)=" << fpabs( me )
+            << " isnan=" << fp_is_nan( fpabs( me ) )
             << std::endl;
 }
 
@@ -172,8 +178,8 @@ namespace mg5amcCpu
       const fptype& me = MemoryAccessMatrixElements::ieventAccessConst( m_matrixElements.data(), ievt );
       const fptype& wg = MemoryAccessWeights::ieventAccessConst( m_samplingWeights.data(), ievt );
       if( fp_is_abnormal( me ) ) continue;
-      stats.sqsMEdiff += std::pow( me - stats.refME, 2 );
-      stats.sqsWGdiff += std::pow( wg - stats.refWG, 2 );
+      stats.sqsMEdiff += (double)fppow( me - stats.refME, 2 );
+      stats.sqsWGdiff += (double)fppow( wg - stats.refWG, 2 );
     }
     // FOURTH PASS: UPDATE THE OVERALL STATS BY ADDING THE NEW STATS
     m_stats += stats;
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
index aee105f269..b5d0fb41b5 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
@@ -4,7 +4,7 @@
 // Copyright (C) 2020-2024 CERN and UCLouvain.
 // Licensed under the GNU Lesser General Public License (version 3 or later).
 // Modified by: O. Mattelaer (Nov 2020) for the MG5aMC CUDACPP plugin.
-// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 
 #include "mgOnGpuConfig.h"
@@ -704,6 +704,11 @@ main( int argc, char** argv )
     {
       if( verbose )
       {
+#ifdef MGONGPU_FPTYPE_QUAD
+size_t width = 42;
+#else
+size_t width = 14;
+#endif
         // Display momenta
         std::cout << "Momenta:" << std::endl;
         for( int ipar = 0; ipar < CPPProcess::npar; ipar++ )
@@ -711,10 +716,10 @@ main( int argc, char** argv )
           // NB: 'setw' affects only the next field (of any type)
           std::cout << std::scientific // fixed format: affects all floats (default precision: 6)
                     << std::setw( 4 ) << ipar + 1
-                    << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 0, ipar )
-                    << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 1, ipar )
-                    << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 2, ipar )
-                    << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 3, ipar )
+                    << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 0, ipar )
+                    << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 1, ipar )
+                    << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 2, ipar )
+                    << std::setw( width ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 3, ipar )
                     << std::endl
                     << std::defaultfloat; // default format: affects all floats
         }
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
index 22acd3abe9..a2cbc7e6fe 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
@@ -1,7 +1,7 @@
 # Copyright (C) 2020-2025 CERN and UCLouvain.
 # Licensed under the GNU Lesser General Public License (version 3 or later).
 # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin.
-# Further modified by: S. Hageboeck, D. Massaro, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2025) for the MG5aMC CUDACPP plugin.
+# Further modified by: S. Hageboeck, D. Massaro, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
 
 #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html)
 #=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts
@@ -14,6 +14,17 @@ override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk
 
 #-------------------------------------------------------------------------------
 
+#=== Include the common MG5aMC Makefile options
+
+# OM: including make_opts is crucial for MG5aMC flag consistency/documentation
+# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp)
+# ZW: need to include make_opts prior to cudacpp_config.mk to pass flags for quadmath
+ifneq ($(wildcard ../../Source/make_opts),)
+  include ../../Source/make_opts
+endif
+
+#-------------------------------------------------------------------------------
+
 #=== Include cudacpp_config.mk
 
 # Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported (and configure defaults if no user-defined choices exist)
@@ -44,16 +55,6 @@ UNAME_P := $(shell uname -p)
 
 #-------------------------------------------------------------------------------
 
-#=== Include the common MG5aMC Makefile options
-
-# OM: including make_opts is crucial for MG5aMC flag consistency/documentation
-# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp)
-ifneq ($(wildcard ../../Source/make_opts),)
-  include ../../Source/make_opts
-endif
-
-#-------------------------------------------------------------------------------
-
 #=== Redefine BACKEND if the current value is 'cppauto'
 
 # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?)
@@ -577,8 +578,10 @@ else ifeq ($(FPTYPE),f)
 else ifeq ($(FPTYPE),m)
   CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT
   GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT
-else
-  $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported)
+else ifeq ($(FPTYPE),q)
+  CXXFLAGS += -DMGONGPU_FPTYPE_QUAD
+else ifeq
+  $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f', 'm' and 'q' are supported)
 endif
 
 # Set the build flags appropriate to each HELINL choice (example: "make HELINL=1")
@@ -811,7 +814,7 @@ endif
 # (NB do not include CUDA_INC here! add it only for NVTX or curand #679)
 $(BUILDDIR)/%%_cpp.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG)
 	@if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi
-	$(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@
+	$(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ $(QUADFLAG)
 
 # Generic target and build rules: objects from CUDA or HIP compilation
 ifneq ($(GPUCC),)
@@ -847,7 +850,7 @@ endif
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib)
-	$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(QUADFLAG)
 
 ifneq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
@@ -875,7 +878,7 @@ endif
 ###$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
 $(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o
-	$(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS)
+	$(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) $(QUADFLAG)
 
 ifneq ($(GPUCC),)
 ###$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSASAN)
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
index b57e56d182..30bf67cdfc 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk
@@ -1,13 +1,21 @@
 # Copyright (C) 2020-2024 CERN and UCLouvain.
 # Licensed under the GNU Lesser General Public License (version 3 or later).
 # Created by: A. Valassi (Mar 2024) for the MG5aMC CUDACPP plugin.
-# Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.
+# Further modified by: A. Valassi, Z. Wettersten (2024-2025) for the MG5aMC CUDACPP plugin.
 
 #-------------------------------------------------------------------------------
 
 #=== Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported
 #=== Configure default values for these variables if no user-defined choices exist
 
+# Include libquadmath if FPTYPE=quad and set backend to cppnone
+ifeq ($(FPTYPE),q)
+  override QUADFLAG = -lquadmath
+  override BACKEND = cppnone
+else
+  override QUADFLAG =
+endif
+
 # Set the default BACKEND (CUDA, HIP or C++/SIMD) choice
 ifeq ($(BACKEND),)
   override BACKEND = cppauto
@@ -39,7 +47,7 @@ ifneq ($(words $(filter $(BACKEND), $(SUPPORTED_BACKENDS))),1)
   $(error Invalid backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)'))
 endif
 
-override SUPPORTED_FPTYPES = d f m
+override SUPPORTED_FPTYPES = d f m q
 ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
   $(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
 endif
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk
index 897de8caa8..485bd3bb7b 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk
@@ -1,7 +1,7 @@
 # Copyright (C) 2020-2024 CERN and UCLouvain.
 # Licensed under the GNU Lesser General Public License (version 3 or later).
 # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin.
-# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
+# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
 
 #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html)
 #=== NB: assume that the same name (e.g. cudacpp.mk, Makefile...) is used in the Subprocess and src directories
@@ -132,7 +132,7 @@ $(LIBDIR)/.build.$(TAG):
 # Generic target and build rules: objects from C++ compilation
 $(BUILDDIR)/%%_cpp.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG)
 	@if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi
-	$(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@
+	$(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ $(QUADFLAG)
 
 # Generic target and build rules: objects from CUDA compilation
 ifneq ($(GPUCC),)
@@ -154,7 +154,7 @@ endif
 ifeq ($(GPUCC),)
 $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects)
 	@if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi
-	$(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS)
+	$(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) $(QUADFLAG)
 else
 $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects)
 	@if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
index c32d0a2740..af441fce19 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h
@@ -1,11 +1,16 @@
 // Copyright (C) 2020-2024 CERN and UCLouvain.
 // Licensed under the GNU Lesser General Public License (version 3 or later).
 // Created by: A. Valassi (Jul 2020) for the MG5aMC CUDACPP plugin.
-// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
 
 #ifndef MGONGPUCONFIG_H
 #define MGONGPUCONFIG_H 1
 
+// When using quad, make sure to include quadmath.h everywhere
+#ifdef MGONGPU_FPTYPE_QUAD
+#include <quadmath.h>
+#endif
+
 // HARDCODED AT CODE GENERATION TIME: DO NOT MODIFY (#473)
 // There are two different code bases for standalone_cudacpp (without multichannel) and madevent+cudacpp (with multichannel)
 %(mgongpu_supports_multichannel)s
@@ -61,7 +66,7 @@
 
 // Choose floating point precision (for everything but color algebra #537)
 // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167)
-#if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT
+#if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT and not defined MGONGPU_FPTYPE_QUAD
 // Floating point precision (CHOOSE ONLY ONE)
 #define MGONGPU_FPTYPE_DOUBLE 1 // default
 //#define MGONGPU_FPTYPE_FLOAT 1 // 2x faster
@@ -69,7 +74,8 @@
 
 // Choose floating point precision (for color algebra alone #537)
 // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE2_FLOAT, nothing happens (issue #167)
-#if not defined MGONGPU_FPTYPE2_DOUBLE and not defined MGONGPU_FPTYPE2_FLOAT
+// ZW: including singular flag for quad precision (quad intended for tests, not production)
+#if not defined MGONGPU_FPTYPE2_DOUBLE and not defined MGONGPU_FPTYPE2_FLOAT and not defined MGONGPU_FPTYPE_QUAD
 // Floating point precision (CHOOSE ONLY ONE)
 #define MGONGPU_FPTYPE2_DOUBLE 1 // default
 //#define MGONGPU_FPTYPE2_FLOAT 1 // 2x faster
@@ -177,6 +183,10 @@ namespace mgOnGpu
   // --- Type definitions
 
   // Floating point type (for everything but color algebra #537): fptype
+#ifdef MGONGPU_FPTYPE_QUAD
+  typedef __float128 fptype; // quad precision (16 bytes, fp128)
+  typedef __float128 fptype2; // quad precision (16 bytes, fp128)
+#else
 #if defined MGONGPU_FPTYPE_DOUBLE
   typedef double fptype; // double precision (8 bytes, fp64)
 #elif defined MGONGPU_FPTYPE_FLOAT
@@ -189,6 +199,7 @@ namespace mgOnGpu
 #elif defined MGONGPU_FPTYPE2_FLOAT
   typedef float fptype2; // single precision (4 bytes, fp32)
 #endif
+#endif // #ifdef MGONGPU_FPTYPE_QUAD
 
   // --- Platform-specific software implementation details
 
@@ -217,6 +228,8 @@ using mgOnGpu::fptype2;
 // C++ SIMD vectorization width (this will be used to set neppV)
 #ifdef MGONGPUCPP_GPUIMPL // CUDA and HIP implementations have no SIMD
 #undef MGONGPU_CPPSIMD
+#elif defined MGONGPU_FPTYPE_QUAD // quad precision: no SIMD
+#undef MGONGPU_CPPSIMD
 #elif defined __AVX512VL__ && defined MGONGPU_PVW512 // C++ "512z" AVX512 with 512 width (512-bit ie 64-byte): 8 (DOUBLE) or 16 (FLOAT)
 #ifdef MGONGPU_FPTYPE_DOUBLE
 #define MGONGPU_CPPSIMD 8
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h
index 960beeeeae..399ccef58d 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuFptypes.h
@@ -1,7 +1,7 @@
 // Copyright (C) 2020-2024 CERN and UCLouvain.
 // Licensed under the GNU Lesser General Public License (version 3 or later).
 // Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
-// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin.
+// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin.
 
 #ifndef MGONGPUFPTYPES_H
 #define MGONGPUFPTYPES_H 1
@@ -10,6 +10,10 @@
 
 #include <algorithm>
 #include <cmath>
+#ifdef MGONGPU_FPTYPE_QUAD
+#include <quadmath.h>
+#include <ostream> // for operator<< overloading
+#endif
 
 // NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
 #ifdef MGONGPUCPP_GPUIMPL // cuda
@@ -70,6 +74,101 @@ namespace mg5amcCpu
 
 #ifndef MGONGPUCPP_GPUIMPL
 
+#ifdef MGONGPU_FPTYPE_QUAD
+
+  //------------------------------
+  // Quad precision types - C++
+  //------------------------------
+
+  // quad max returns by value, not by reference
+  inline fptype
+  fpmax( const fptype& a, const fptype& b )
+  {
+    return fmaxq( a, b );
+  }
+
+  // quad min returns by value, not by reference
+  inline fptype
+  fpmin( const fptype& a, const fptype& b )
+  {
+    return fminq( a, b );
+  }
+
+  inline fptype
+  fpsqrt( const fptype& f )
+  {
+    return sqrtq( f );
+  }
+
+  inline fptype
+  fpabs( const fptype& f )
+  {
+    return fabsq( f );
+  }
+
+  inline fptype
+  fppow( const fptype& base, const int& exp )
+  {
+    // Special case for positive integer exponents
+    if ( exp >= 0 )
+    {
+      fptype result = static_cast<__float128>( 1.0 );
+      for ( int i = 0; i < exp; ++i )
+      {
+        result *= base;
+      }
+      return result;
+    }
+    // General case
+    return powq( base, static_cast<__float128>( exp ) );
+  }
+
+  inline fptype
+  fppow( const fptype& base, const fptype& exp )
+  {
+    return powq( base, exp );
+  }
+
+  // Overload operator<< for quad precision (need to convert to string first)
+  inline std::ostream&
+  operator<<( std::ostream& os, const fptype& f )
+  {
+    char buffer[128];
+    // Convert __float128 to string with 36 digits of precision
+    int n = quadmath_snprintf( buffer, sizeof( buffer ), "%.36Qg", f );
+    if ( n > 0 && n < static_cast<int>( sizeof( buffer ) ) )
+    {
+      os << buffer;
+    }
+    else
+    {
+      os << "ConversionError";
+    }
+    return os;
+  }
+
+  inline bool
+  fp_is_infinite( const fptype& fp )
+  {
+    return isinfq( fp );
+  }
+
+  inline bool
+  fp_is_nan( const fptype& fp )
+  {
+    return isnanq( fp );
+  }
+
+#define UNUSED(x) (void)(x)
+  inline bool
+  fp_is_normal( const fptype& fp )
+  {
+    UNUSED(fp);
+    return true; // no isnormalq in quadmath.h
+  }
+
+#else
+
   //------------------------------
   // Floating point types - C++
   //------------------------------
@@ -92,6 +191,40 @@ namespace mg5amcCpu
     return std::sqrt( f );
   }
 
+  inline fptype
+  fpabs( const fptype& f )
+  {
+    return std::abs( f );
+  }
+
+  inline fptype
+  fppow( const fptype& base, const int& exp )
+  {
+    return std::pow( base, static_cast<fptype>( exp ) );
+  }
+
+  inline bool
+  fp_is_infinite( const fptype& fp )
+  {
+    return std::isinf( fp );
+  }
+
+  inline bool
+  fp_is_nan( const fptype& fp )
+  {
+    //#pragma clang diagnostic push
+    //#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661)
+    return std::isnan( fp ); // always false for clang in fast math mode (tautological compare)?
+    //#pragma clang diagnostic pop
+  }
+
+  inline bool
+  fp_is_normal( const fptype& fp )
+  {
+    return std::isnormal( fp );
+  }
+
+#endif // #ifdef MGONGPU_FPTYPE_QUAD
 #endif // #ifndef MGONGPUCPP_GPUIMPL
 
   //==========================================================================
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
index 3f8a85afa6..3ab422e693 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -201,7 +201,7 @@ def get_header_txt(self, name=None, couplings=None,mode=''):
                 list_arg = '[]' # AV from cxtype_sv to fptype array (running alphas #373)
                 point = self.type2def['pointer_coup']
                 args.append('%s %s%s%s'% (type, point, argname, list_arg))
-                args.append('double Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628
+                args.append('fptype Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628 | ZW: changed from 'double' to 'fptype' for quad
             else:
                 args.append('%s %s%s'% (type, argname, list_arg))
         if not self.offshell:
@@ -805,7 +805,7 @@ def super_write_set_parameters_donotfixMajorana(self, params):
         for param in params:
             res_strings.append( "%s" % param.expr )
         res = "\n".join(res_strings)
-        res = res.replace('ABS(','std::abs(') # for SMEFT #614 and #616
+        res = res.replace('ABS(','fpabs(') # for SMEFT #614 and #616
         return res
 
     # AV - replace export_cpp.UFOModelConverterCPP method (eventually split writing of parameters and fixes for Majorana particles #622)
@@ -823,7 +823,7 @@ def super_write_set_parameters_onlyfixMajorana(self, hardcoded): # FIXME! split
                     res_strings.append( prefix+"  constexpr double %(W)s = %(W)s_sign * %(W)s_abs;" % { 'W' : particle.get('width') } )
                 else:
                     res_strings.append( prefix+"  if( %s < 0 )" % particle.get('mass'))
-                    res_strings.append( prefix+"    %(width)s = -std::abs( %(width)s );" % {"width": particle.get('width')})
+                    res_strings.append( prefix+"    %(width)s = -fpabs( %(width)s );" % {"width": particle.get('width')})
         if len( res_strings ) != 0 : res_strings = [ prefix + "  // Fixes for Majorana particles" ] + res_strings
         if not hardcoded: return '\n' + '\n'.join(res_strings) if res_strings else ''
         else: return '\n' + '\n'.join(res_strings) + '\n' if res_strings else '\n'